#######################################################################################################
# PROJECT LECTURER: PROF. ADAM ZAGDANSKI #
# COURSE TITLE: DATA MINING #
# PROJECT TOPIC: CLUSTER ANALYSIS OF BREAST CANCER CELL: AN APPLICATION OF PARTITION AND #
# HIERARCHICAL APPROACH #
# STUDENTS AND ID: SEGUN LIGHT JEGEDE (257389) and ISAAC AKOJI PAUL (257388) #
#######################################################################################################
## LOADING THE DATA INTO R
#Reading the data into r and renaming the variable names in a readable manner.
col.names=c("id_number","clump_thickness","uniformity_cell_size","uniformity_cell_shape",
"marginal_adhesion","single_epithelial_cell_size","bare_nuclei",
"bland_chromatin","normal_nucleoli","mitoses","class")
bcw <- read.csv("C:/Users/jeged/Downloads/breast-cancer-wisconsin.data", header=FALSE, col.names=col.names)
#View(bcw)
attach(bcw)
## DATA PREPARATION AND CLEANING
library(DataExplorer)
#Checking the data type of each columns
str(bcw)
## 'data.frame': 699 obs. of 11 variables:
## $ id_number : int 1000025 1002945 1015425 1016277 1017023 1017122 1018099 1018561 1033078 1033078 ...
## $ clump_thickness : int 5 5 3 6 4 8 1 2 2 4 ...
## $ uniformity_cell_size : int 1 4 1 8 1 10 1 1 1 2 ...
## $ uniformity_cell_shape : int 1 4 1 8 1 10 1 2 1 1 ...
## $ marginal_adhesion : int 1 5 1 1 3 8 1 1 1 1 ...
## $ single_epithelial_cell_size: int 2 7 2 3 2 7 2 2 2 2 ...
## $ bare_nuclei : chr "1" "10" "2" "4" ...
## $ bland_chromatin : int 3 3 3 3 3 9 3 3 1 2 ...
## $ normal_nucleoli : int 1 2 1 7 1 7 1 1 1 1 ...
## $ mitoses : int 1 1 1 1 1 1 1 1 5 1 ...
## $ class : int 2 2 2 2 2 4 2 2 2 2 ...
bcw$class = as.factor(bcw$class) #convert the class to factor with "2" as benign and "4" as malignant
levels(bcw$class)[levels(bcw$class)=="2"] <- "benign"
levels(bcw$class)[levels(bcw$class)=="4"] <- "malignant"
bcw[,2:10] <- suppressWarnings(apply(bcw[, 2:10], 2, function(x) as.numeric(as.character(x)))) #format all features as numeric
bcw$id_number = as.character(bcw$id_number) #id_number is nothing but a string of cells identification number
str(bcw) #Every attributes is now in their respective perfect form.
## 'data.frame': 699 obs. of 11 variables:
## $ id_number : chr "1000025" "1002945" "1015425" "1016277" ...
## $ clump_thickness : num 5 5 3 6 4 8 1 2 2 4 ...
## $ uniformity_cell_size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ uniformity_cell_shape : num 1 4 1 8 1 10 1 2 1 1 ...
## $ marginal_adhesion : num 1 5 1 1 3 8 1 1 1 1 ...
## $ single_epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ bare_nuclei : num 1 10 2 4 1 10 10 1 1 1 ...
## $ bland_chromatin : num 3 3 3 3 3 9 3 3 1 2 ...
## $ normal_nucleoli : num 1 2 1 7 1 7 1 1 1 1 ...
## $ mitoses : num 1 1 1 1 1 1 1 1 5 1 ...
## $ class : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
#Handling missing data by deleting the corresponding rows if the missing observations are not too much
t(introduce(bcw))
## [,1]
## rows 699
## columns 11
## discrete_columns 2
## continuous_columns 9
## all_missing_columns 0
## total_missing_values 16
## complete_rows 683
## total_observations 7689
## memory_usage 98752
sum(is.na(bcw)) #check for missing observations
## [1] 16
plot_intro(bcw)

plot_missing(bcw)

bcw<-na.omit(bcw) #2.29% of the bare_nuclei measurement are missing variables, thus, we decided to remove any form of missing observation.
sum(is.na(bcw))
## [1] 0
plot_missing(bcw)

nrow(bcw) #The data reduced from 699 to 683, we suppose we did not lose too much information, just about 3%
## [1] 683
#View(bcw)
##EXPLORATORY DATA ANALYSIS
#Describing the Grouping Variable
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
bcwnew <- bcw %>% group_by(class) %>%
summarize(count = n()) %>% # count records by species
mutate(percentage = count/sum(count)) # find percent of total
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(bcwnew, aes(class, percentage, fill = class)) +
geom_bar(stat='identity') +
geom_text(aes(label=scales::percent(percentage)), position = position_stack(vjust = .5))+
scale_y_continuous(labels = scales::percent)

#Describing the features
my.summary <- function(df)
{
results <- matrix(, nrow = 9, ncol = ncol(df))
for (i in 1:ncol(df)){
X=df[,i]
results[,i] <- rbind(min(X),quantile(X,0.25), median(X), mean(X), quantile(X,0.75), max(X), var(X), sd(X), IQR(X))
}
rownames(results) <- c("min", "Q1", "median", "mean", "Q3", "max", "var", "sd", "IQR")
colnames(results) <-names(df)
return(results)
}
ms<-my.summary(bcw[,2:10])
ms
## clump_thickness uniformity_cell_size uniformity_cell_shape
## min 1.000000 1.000000 1.000000
## Q1 2.000000 1.000000 1.000000
## median 4.000000 1.000000 1.000000
## mean 4.442167 3.150805 3.215227
## Q3 6.000000 5.000000 5.000000
## max 10.000000 10.000000 10.000000
## var 7.956694 9.395113 8.931615
## sd 2.820761 3.065145 2.988581
## IQR 4.000000 4.000000 4.000000
## marginal_adhesion single_epithelial_cell_size bare_nuclei
## min 1.000000 1.000000 1.000000
## Q1 1.000000 2.000000 1.000000
## median 1.000000 2.000000 1.000000
## mean 2.830161 3.234261 3.544656
## Q3 4.000000 4.000000 6.000000
## max 10.000000 10.000000 10.000000
## var 8.205717 4.942109 13.277695
## sd 2.864562 2.223085 3.643857
## IQR 3.000000 2.000000 5.000000
## bland_chromatin normal_nucleoli mitoses
## min 1.000000 1.000000 1.000000
## Q1 2.000000 1.000000 1.000000
## median 3.000000 1.000000 1.000000
## mean 3.445095 2.869693 1.603221
## Q3 5.000000 4.000000 1.000000
## max 10.000000 10.000000 10.000000
## var 6.001013 9.318772 3.002160
## sd 2.449697 3.052666 1.732674
## IQR 3.000000 3.000000 0.000000
write.table(ms, file = "summary statistics.txt", sep = ",", quote = FALSE, row.names = F)
#construct the plots three by three
#construct the histogram plots
library(ggpubr)
ha<-fg<-ggplot(bcw, aes(x = clump_thickness, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hb<-ggplot(bcw, aes(x = uniformity_cell_size, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hc<-ggplot(bcw, aes(x = uniformity_cell_shape, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hd<-fg<-ggplot(bcw, aes(x = marginal_adhesion, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
ggarrange(ha,hb,hc,hd,labels = c("A", "B", "C","D"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hd<-fg<-ggplot(bcw, aes(x = marginal_adhesion, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
he<-ggplot(bcw, aes(x = single_epithelial_cell_size, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hf<-ggplot(bcw, aes(x = bare_nuclei, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hg<-fg<-ggplot(bcw, aes(x = bland_chromatin, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
hh<-ggplot(bcw, aes(x = normal_nucleoli, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
ggarrange(he,hf,hg,hh,labels = c("E", "F","G", "H"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hi<-ggplot(bcw, aes(x = mitoses, fill = class)) + geom_histogram(position = "identity", alpha = 0.4)
ggarrange(hi,labels = c("I"),ncol = 2, nrow = 2) #, common.legend = TRUE, legend="bottom")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

'plot_histogram(bcw)'
## [1] "plot_histogram(bcw)"
#construct the density plots
plot_density(bcw[2:4])

plot_density(bcw[5:7])

plot_density(bcw[8:10])

#construct the normal qq plot
plot_qq(bcw[2:4])

plot_qq(bcw[5:7])

plot_qq(bcw[8:10])

#construct the barplots
bpa <- ggplot(bcw, aes(x = class, y = clump_thickness))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpb <- ggplot(bcw, aes(x = class, y = uniformity_cell_size))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpc <- ggplot(bcw, aes(x = class, y = uniformity_cell_shape))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpd <- ggplot(bcw, aes(x = class, y = marginal_adhesion))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
ggarrange(bpa,bpb,bpc,bpd,labels = c("A", "B", "C","D"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")

bpe <- ggplot(bcw, aes(x = class, y = single_epithelial_cell_size))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpf <- ggplot(bcw, aes(x = class, y = bare_nuclei))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
bpg <- ggplot(bcw, aes(x = class, y = bland_chromatin))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
bph <- ggplot(bcw, aes(x = class, y = normal_nucleoli))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
ggarrange(bpe,bpf,bpg,bph,labels = c("E", "F", "G","H"),ncol = 2, nrow = 2, common.legend = TRUE, legend="bottom")

bpi <- ggplot(bcw, aes(x = class, y = mitoses))+geom_boxplot(aes(color = class))+
scale_color_manual(values = c("#00AFBB", "#E7B800"))
ggarrange(bpi,labels = c("I"),ncol = 2, nrow = 2) #, common.legend = TRUE, legend="bottom")

'plot_boxplot(bcw, by="class")'
## [1] "plot_boxplot(bcw, by=\"class\")"
#Construct Boxplot without grouping
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
bcwData <- melt(bcw)
## Using id_number, class as id variables
par(mar=c(10,7,1,1))
boxplot(data=bcwData, value~variable, las=2)

#pairs(bcw[2:10], pch = 21, bg = c("#d95f02", "#7570b3")[unclass(bcw$class)])
plot_correlation(bcw, type = "continuous") #correlation plot

#we can also do sum data preparation here; it is an error if the measurement does not fall within 1-10 i.e Minimum and Maximum
## NORMALIZATION
a<-(bcw$clump_thickness-min(bcw$clump_thickness))/(max(bcw$clump_thickness)-min(bcw$clump_thickness))
b<-(bcw$uniformity_cell_size-min(bcw$uniformity_cell_size))/(max(bcw$uniformity_cell_size)-min(bcw$uniformity_cell_size))
c<-(bcw$uniformity_cell_shape-min(bcw$uniformity_cell_shape))/(max(bcw$uniformity_cell_shape)-min(bcw$uniformity_cell_shape))
d<-(bcw$marginal_adhesion-min(bcw$marginal_adhesion))/(max(bcw$marginal_adhesion)-min(bcw$marginal_adhesion))
e<-(bcw$single_epithelial_cell_size-min(bcw$single_epithelial_cell_size))/(max(bcw$single_epithelial_cell_size)-min(bcw$single_epithelial_cell_size))
f<-(bcw$bare_nuclei-min(bcw$bare_nuclei))/(max(bcw$bare_nuclei)-min(bcw$bare_nuclei))
g<-(bcw$bland_chromatin-min(bcw$bland_chromatin))/(max(bcw$bland_chromatin)-min(bcw$bland_chromatin))
h<-(bcw$normal_nucleoli-min(bcw$normal_nucleoli))/(max(bcw$normal_nucleoli)-min(bcw$normal_nucleoli))
i<-(bcw$mitoses-min(bcw$mitoses))/(max(bcw$mitoses)-min(bcw$mitoses))
bcw1<-data.frame(bcw[,1],a,b,c,d,e,f,g,h,i,bcw[,11])
colnames(bcw1)<-c("ID","clump_thickness","uniformity_cell_size","uniformity_cell_shape","marginal_adhesion",
"single_epithelial_cell_size","bare_nuclei","bland_chromatin","normal_nucleoli","mitoses","class")
str(bcw1)
## 'data.frame': 683 obs. of 11 variables:
## $ ID : chr "1000025" "1002945" "1015425" "1016277" ...
## $ clump_thickness : num 0.444 0.444 0.222 0.556 0.333 ...
## $ uniformity_cell_size : num 0 0.333 0 0.778 0 ...
## $ uniformity_cell_shape : num 0 0.333 0 0.778 0 ...
## $ marginal_adhesion : num 0 0.444 0 0 0.222 ...
## $ single_epithelial_cell_size: num 0.111 0.667 0.111 0.222 0.111 ...
## $ bare_nuclei : num 0 1 0.111 0.333 0 ...
## $ bland_chromatin : num 0.222 0.222 0.222 0.222 0.222 ...
## $ normal_nucleoli : num 0 0.111 0 0.667 0 ...
## $ mitoses : num 0 0 0 0 0 ...
## $ class : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
#View(bcw1)
#Boxplot of the normalized data
bcw1Data <- melt(bcw1)
## Using ID, class as id variables
par(mar=c(10,7,1,1))
boxplot(data=bcw1Data, value~variable, las=2)

## Clustering Start
library(stats)
library(cluster)
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(rgl)
library(scatterplot3d)
bcw.features <- bcw[,2:10] # We remove class labels
bcw.real.class.labels <- bcw[,11]
bcw.names <- paste(bcw$class, bcw$id_number, sep=" ") #we assign label names by including ID
## ACCESSING CLUSTERING TENDENCY
# Random data generated from the iris data set
random_df <- apply(bcw.features, 2, function(x){runif(length(x), min(x), (max(x)))})
random_df <- as.data.frame(random_df)
fviz_pca_ind(prcomp(bcw.features), title = "PCA - Breast Cancer data", habillage = bcw.real.class.labels, palette = "jco",
geom = "point", ggtheme = theme_classic(),legend = "bottom") # Plot bcw data set

fviz_pca_ind(prcomp(random_df), title = "PCA - Random data", geom = "point", ggtheme = theme_classic()) # Plot the random df

res.bcw.real <- get_clust_tendency(bcw.features, n = nrow(bcw.features)-1, graph = FALSE)
res.bcw.real$hopkins_stat
## [1] 0.7111959
res.bcw.random <- get_clust_tendency(random_df, n = nrow(random_df)-1, graph = FALSE)
res.bcw.random$hopkins_stat
## [1] 0.4986158
## Selecting the Optimal Number of Cluster
# Naive approach: 'elbow method' - We are looking for a strong bend in the chart, the so-called "elbow" or "knee".
fviz_nbclust(bcw.features, FUNcluster = kmeans, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) #KMeans

fviz_nbclust(bcw.features, FUNcluster = cluster::pam, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) #PAM

fviz_nbclust(bcw.features, FUNcluster = cluster::clara, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) #CLARA

fviz_nbclust(bcw.features, FUNcluster = hcut, method="wss", k.max=10) + geom_vline(xintercept=2, linetype=2) # hierarchical clustering

# Other advanced methods used to select the optimal K: Silhouette.
fviz_nbclust(bcw.features, FUNcluster = kmeans, method = "silhouette") #KMeans

fviz_nbclust(bcw.features, FUNcluster = cluster::pam, method = "silhouette") # PAM

fviz_nbclust(bcw.features, FUNcluster = cluster::clara, method = "silhouette") # CLARA

fviz_nbclust(bcw.features, FUNcluster = hcut, method = "silhouette") # hierarchical clustering

# Using the NbClust
library(NbClust)
NbClust.results.1 <- NbClust(bcw.features, distance="euclidean", min.nc=2, max.nc=10, method="complete", index="all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 10 proposed 2 as the best number of clusters
## * 2 proposed 3 as the best number of clusters
## * 7 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 3 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
NbClust.results.1$All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 11.9005 717.2956 19.4655 16.7361 2217.346 1.349710e+30 5370507 23592.81
## 3 0.2747 378.0760 247.6635 3.0523 2459.171 2.131352e+30 5087813 22937.18
## 4 22.0334 425.7778 43.6937 10.4944 3219.390 1.244910e+30 3498839 16813.52
## 5 0.3649 350.2892 53.8250 9.9600 3579.914 1.147398e+30 3039867 15796.98
## 6 1.4747 312.7813 41.3879 11.2960 3914.334 1.012584e+30 2748377 14635.13
## 7 1.2650 283.0651 34.6282 11.7409 4292.956 7.917215e+29 2372785 13791.97
## 8 0.7710 259.6181 36.8175 12.0621 4538.338 7.219831e+29 2094970 13119.90
## 9 4.1627 243.7969 20.5218 12.8003 4855.276 5.745171e+29 1824134 12441.29
## 10 1.4853 225.2522 11.3643 13.5573 5039.868 5.413058e+29 1692788 12073.68
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky
## 2 59.3419 4.7661 0.3069 0.9217 0.5516 0.9366 11.1766 0.4043 0.4840
## 3 60.5916 4.9023 0.3112 1.3884 0.5187 0.5381 441.1776 5.1442 0.4095
## 4 67.3357 6.6878 0.2299 1.6997 0.4882 0.8899 19.4300 0.7385 0.3926
## 5 70.8713 7.1181 0.2241 2.1068 0.4804 0.7521 28.6715 1.9565 0.3567
## 6 73.7998 7.6832 0.2342 1.8653 0.4855 0.7609 21.3667 1.8595 0.3328
## 7 79.0534 8.1529 0.2329 1.7560 0.4854 0.7930 17.7512 1.5449 0.3117
## 8 82.3080 8.5706 0.2335 1.7606 0.4577 0.7257 21.5471 2.2308 0.2936
## 9 88.0672 9.0380 0.2458 1.7441 0.4610 0.5837 12.1247 4.0449 0.2786
## 10 90.4255 9.3132 0.2475 1.6303 0.4439 0.5531 4.8480 4.1589 0.2660
## Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 11796.407 0.7681 0.5460 0.2213 0.1638 0 0.3723 4.9496 0.6338
## 3 7645.728 0.7727 0.6667 0.2246 0.1679 0 0.4700 4.8947 0.7078
## 4 4203.379 0.8407 0.5600 0.3034 0.1701 0 0.5420 4.1351 0.6477
## 5 3159.396 0.8515 0.4360 0.3135 0.1639 0 0.6668 4.0377 0.6518
## 6 2439.188 0.8557 0.4960 0.3148 0.1765 0 0.5972 3.9161 0.6226
## 7 1970.281 0.8579 0.4395 0.3158 0.1790 0 0.5956 3.8208 0.5794
## 8 1639.987 0.8601 0.6144 0.3162 0.1826 0 0.5877 3.7485 0.5768
## 9 1382.366 0.8610 0.3082 0.3173 0.1949 0 0.5738 3.6622 0.5385
## 10 1207.368 0.8612 0.2006 0.3173 0.1965 0 0.5449 3.6204 0.5034
NbClust.results.1$Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 4.0000 2.0000 3.000 2.0000 4.0000 4.000000e+00 4
## Value_Index 22.0334 717.2956 228.198 16.7361 760.2184 7.889303e+29 1588974
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 4.000 4.0000 4.0000 5.0000 2.0000 2.0000 2.0000
## Value_Index 5107.132 6.7441 -1.3551 0.2241 0.9217 0.5516 0.9366
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain
## Number_clusters 2.0000 2.0000 2.000 3.000 10.0000 1 2.0000
## Value_Index 11.1766 0.4043 0.484 4150.679 0.8612 NA 0.2213
## Dunn Hubert SDindex Dindex SDbw
## Number_clusters 10.0000 0 2.0000 0 10.0000
## Value_Index 0.1965 0 0.3723 0 0.5034
NbClust.results.1$Best.partition
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1
## 21 22 23 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 42
## 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1
## 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
## 2 1 2 1 2 1 1 2 1 1 2 2 2 1 2 1 1 1 1 1
## 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
## 2 1 1 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1
## 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
## 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1
## 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
## 1 1 2 1 2 2 1 2 1 1 1 2 1 1 1 2 1 1 1 1
## 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 141 142 143
## 2 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1
## 144 145 147 148 149 150 151 152 153 154 155 156 157 158 160 161 162 163 164 166
## 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 2 1 1 1 1
## 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
## 2 2 1 1 1 1 1 2 2 2 1 2 1 1 1 1 1 2 2 1
## 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
## 2 2 2 1 2 2 1 1 1 1 2 1 1 1 2 2 1 1 1 2
## 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
## 2 1 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2 2 1
## 227 228 229 230 231 232 233 234 235 237 238 239 240 241 242 243 244 245 246 247
## 2 2 1 2 2 2 1 1 1 2 2 2 1 1 1 1 1 1 1 2
## 248 249 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
## 1 1 1 1 1 2 2 1 1 1 1 1 2 2 2 1 1 1 2 1
## 269 270 271 272 273 274 275 277 278 279 280 281 282 283 284 285 286 287 288 289
## 2 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 1 1
## 290 291 292 294 296 297 299 300 301 302 303 304 305 306 307 308 309 310 311 312
## 2 1 1 1 2 1 1 1 2 1 2 1 1 2 1 1 2 1 1 1
## 313 314 315 317 318 319 320 321 323 324 325 326 327 328 329 330 331 332 333 334
## 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1
## 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
## 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
## 1 1 1 2 2 1 2 2 1 1 1 1 2 2 1 1 1 1 1 1
## 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
## 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1
## 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 413 414 415
## 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2
## 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
## 1 2 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 2
## 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
## 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1
## 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
## 1 1 2 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
## 1 1 1 1 2 1 1 2 2 1 1 1 2 1 1 1 2 1 2 1
## 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
## 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2
## 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
## 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
## 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
## 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1
## 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
## 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 2 1 1 2
## 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
## 1 1 1 1 1 1 2 2 1 1 1 2 1 2 1 2 2 1 1 2
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
## 1 1 1 1 1 1 1 1 2 1 2 1 1 2 1 1 2 2 1 1
## 616 617 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
## 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
## 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
## 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
## 1 1 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1
## 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
## 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1
## 697 698 699
## 2 2 2
factoextra::fviz_nbclust(NbClust.results.1) + theme_minimal() + ggtitle("Optimal number of clusters")
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first element
## will be used
## Warning in if (class(best_nc) == "matrix") .viz_NbClust(x, print.summary, : the
## condition has length > 1 and only the first element will be used
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first element
## will be used
## Warning in if (class(best_nc) == "matrix") {: the condition has length > 1 and
## only the first element will be used
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 1 proposed 1 as the best number of clusters
## * 10 proposed 2 as the best number of clusters
## * 2 proposed 3 as the best number of clusters
## * 7 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 3 proposed 10 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 2 .

## Internal Cluster Validation
library(clValid)
library(mclust)
## Package 'mclust' version 5.4.7
## Type 'citation("mclust")' for citing this R package in publications.
##
## Attaching package: 'mclust'
## The following object is masked from 'package:purrr':
##
## map
'methods <- c("agnes","kmeans", "diana", "pam", "clara")
K.range <- 2:5 # range for number of clusters
internal.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation="internal")
y
summary(internal.validation)
optimalScores(internal.validation)
par(mfrow = c(2, 2))
plot(internal.validation, legend = FALSE, lwd=2)
plot.new()
legend("center", clusterMethods(internal.validation), col=1:9, lty=1:9, pch=paste(1:9))
stability.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation="stability")
y
summary(stability.validation)
optimalScores(stability.validation)
par(mfrow = c(2,2))
plot(stability.validation, measure=c("APN","AD","ADM"), legend=FALSE, lwd=2)
plot.new()
legend("center", clusterMethods(stability.validation), col=1:9, lty=1:9, pch=paste(1:9))'
## [1] "methods <- c(\"agnes\",\"kmeans\", \"diana\", \"pam\", \"clara\")\nK.range <- 2:5 # range for number of clusters\ninternal.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation=\"internal\")\ny\nsummary(internal.validation)\noptimalScores(internal.validation)\npar(mfrow = c(2, 2))\nplot(internal.validation, legend = FALSE, lwd=2)\nplot.new()\nlegend(\"center\", clusterMethods(internal.validation), col=1:9, lty=1:9, pch=paste(1:9))\n\nstability.validation <- clValid(bcw.features, nClust=K.range, clMethods=methods, validation=\"stability\")\ny\nsummary(stability.validation)\noptimalScores(stability.validation)\npar(mfrow = c(2,2))\nplot(stability.validation, measure=c(\"APN\",\"AD\",\"ADM\"), legend=FALSE, lwd=2)\nplot.new()\nlegend(\"center\", clusterMethods(stability.validation), col=1:9, lty=1:9, pch=paste(1:9))"
## PCA
bcw.pca <- bcw[, 2:10]
prcomp(bcw.pca, retx=T, center=T, scale.=T) -> bcw.after.pca
library(factoextra)
fviz_eig(bcw.after.pca) #The Knee Plot for PCA

fviz_pca_var(bcw.after.pca, col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

# Results for Variables
res.bcw.pca <- get_pca_var(bcw.after.pca)
res.bcw.pca$coord # Coordinates
## Dim.1 Dim.2 Dim.3 Dim.4
## clump_thickness -0.7336763 -0.12402816 0.6362101794 -0.073103185
## uniformity_cell_size -0.9249037 -0.04108445 -0.0146410842 0.138476643
## uniformity_cell_shape -0.9171059 -0.07260418 0.0246083043 0.119229545
## marginal_adhesion -0.8081485 -0.04588881 -0.3030226066 -0.334350435
## single_epithelial_cell_size -0.8166750 0.14482028 -0.0644326696 0.289748243
## bare_nuclei -0.8138416 -0.23013886 0.0005077785 -0.338041989
## bland_chromatin -0.8397818 -0.20090791 -0.1564667443 -0.008845551
## normal_nucleoli -0.8151140 0.02991976 -0.0985836639 0.282785536
## mitoses -0.5591457 0.79768592 0.0591084558 -0.175582935
## Dim.5 Dim.6 Dim.7 Dim.8
## clump_thickness 0.04953131 -0.13324709 -0.004620507 0.12648504
## uniformity_cell_size -0.08981904 -0.07638857 -0.111466349 -0.22278472
## uniformity_cell_shape -0.06684128 -0.04094765 -0.069022299 -0.29755368
## marginal_adhesion -0.01206751 -0.35967455 0.067189001 0.08345318
## single_epithelial_cell_size -0.39262650 0.03808059 0.114496139 0.23420699
## bare_nuclei -0.07694312 0.33472577 0.218549434 -0.06467187
## bland_chromatin 0.14039350 0.16422401 -0.380038688 0.19593569
## normal_nucleoli 0.42562850 0.01182280 0.249473012 0.03779217
## mitoses 0.06477553 0.08150570 -0.071685215 -0.02733719
## Dim.9
## clump_thickness -0.0008167946
## uniformity_cell_size -0.2179785924
## uniformity_cell_shape 0.1984374718
## marginal_adhesion 0.0136811963
## single_epithelial_cell_size 0.0198861241
## bare_nuclei -0.0227459864
## bland_chromatin 0.0185038372
## normal_nucleoli -0.0065638440
## mitoses 0.0022285395
res.bcw.pca$contrib # Contributions to the PCs
## Dim.1 Dim.2 Dim.3 Dim.4
## clump_thickness 9.124180 1.9824789 7.506012e+01 1.1626972
## uniformity_cell_size 14.500329 0.2175319 3.975159e-02 4.1720268
## uniformity_cell_shape 14.256857 0.6793463 1.122978e-01 3.0928711
## marginal_adhesion 11.070498 0.2713824 1.702778e+01 24.3219182
## single_epithelial_cell_size 11.305333 2.7028802 7.698751e-01 18.2656722
## bare_nuclei 11.227024 6.8257112 4.781418e-05 24.8619583
## bland_chromatin 11.954124 5.2019008 4.539961e+00 0.0170233
## normal_nucleoli 11.262157 0.1153677 1.802262e+00 17.3983643
## mitoses 5.299499 82.0034006 6.478989e-01 6.7074686
## Dim.5 Dim.6 Dim.7 Dim.8
## clump_thickness 0.64515022 5.88147462 0.007251659 6.1358900
## uniformity_cell_size 2.12147586 1.93298082 4.220323512 19.0357528
## uniformity_cell_shape 1.17487272 0.55542925 1.618218004 33.9570458
## marginal_adhesion 0.03829449 42.85388274 1.533396807 2.6710683
## single_epithelial_cell_size 40.53782907 0.48037256 4.452868511 21.0377340
## bare_nuclei 1.55682870 37.11496662 16.223986031 1.6040952
## bland_chromatin 5.18316779 8.93396163 49.058448484 14.7240179
## normal_nucleoli 47.63900579 0.04630327 21.140016973 0.5477757
## mitoses 1.10337536 2.20062849 1.745490019 0.2866203
## Dim.9
## clump_thickness 7.548417e-04
## uniformity_cell_size 5.375983e+01
## uniformity_cell_shape 4.455306e+01
## marginal_adhesion 2.117768e-01
## single_epithelial_cell_size 4.474355e-01
## bare_nuclei 5.853825e-01
## bland_chromatin 3.873948e-01
## normal_nucleoli 4.874686e-02
## mitoses 5.619153e-03
res.bcw.pca$cos2 # Quality of representation
## Dim.1 Dim.2 Dim.3 Dim.4
## clump_thickness 0.5382809 0.0153829836 4.047634e-01 5.344076e-03
## uniformity_cell_size 0.8554468 0.0016879317 2.143613e-04 1.917578e-02
## uniformity_cell_shape 0.8410832 0.0052713666 6.055686e-04 1.421568e-02
## marginal_adhesion 0.6531039 0.0021057833 9.182270e-02 1.117902e-01
## single_epithelial_cell_size 0.6669580 0.0209729147 4.151569e-03 8.395404e-02
## bare_nuclei 0.6623382 0.0529638934 2.578390e-07 1.142724e-01
## bland_chromatin 0.7052335 0.0403639873 2.448184e-02 7.824378e-05
## normal_nucleoli 0.6644109 0.0008951922 9.718739e-03 7.996766e-02
## mitoses 0.3126439 0.6363028325 3.493810e-03 3.082937e-02
## Dim.5 Dim.6 Dim.7 Dim.8
## clump_thickness 0.0024533504 0.0177547869 2.134908e-05 0.0159984655
## uniformity_cell_size 0.0080674599 0.0058352139 1.242475e-02 0.0496330332
## uniformity_cell_shape 0.0044677570 0.0016767101 4.764078e-03 0.0885381946
## marginal_adhesion 0.0001456247 0.1293657803 4.514362e-03 0.0069644328
## single_epithelial_cell_size 0.1541555654 0.0014501316 1.310937e-02 0.0548529161
## bare_nuclei 0.0059202432 0.1120413440 4.776386e-02 0.0041824513
## bland_chromatin 0.0197103343 0.0269695263 1.444294e-01 0.0383907944
## normal_nucleoli 0.1811596241 0.0001397787 6.223678e-02 0.0014282478
## mitoses 0.0041958698 0.0066431792 5.138770e-03 0.0007473219
## Dim.9
## clump_thickness 6.671534e-07
## uniformity_cell_size 4.751467e-02
## uniformity_cell_shape 3.937743e-02
## marginal_adhesion 1.871751e-04
## single_epithelial_cell_size 3.954579e-04
## bare_nuclei 5.173799e-04
## bland_chromatin 3.423920e-04
## normal_nucleoli 4.308405e-05
## mitoses 4.966388e-06
library("corrplot")
## corrplot 0.84 loaded
corrplot(res.bcw.pca$cos2, is.corr=FALSE)

fviz_contrib(bcw.after.pca, choice = "var", axes = 1:2, top = 9) # Contributions of variables to PC1 and PC2

fviz_contrib(bcw.after.pca, choice = "var", axes = 1, top = 9) # Contributions of variables to PC1

fviz_contrib(bcw.after.pca, choice = "var", axes = 2, top = 9) # Contributions of variables to PC2

print("Principal components:")
## [1] "Principal components:"
print(bcw.after.pca$rotation)
## PC1 PC2 PC3 PC4
## clump_thickness -0.3020626 -0.14080053 0.866372452 -0.10782844
## uniformity_cell_size -0.3807930 -0.04664031 -0.019937801 0.20425540
## uniformity_cell_shape -0.3775825 -0.08242247 0.033510871 0.17586560
## marginal_adhesion -0.3327236 -0.05209438 -0.412647341 -0.49317257
## single_epithelial_cell_size -0.3362340 0.16440439 -0.087742529 0.42738358
## bare_nuclei -0.3350675 -0.26126062 0.000691478 -0.49861767
## bland_chromatin -0.3457474 -0.22807676 -0.213071845 -0.01304734
## normal_nucleoli -0.3355914 0.03396582 -0.134248356 0.41711347
## mitoses -0.2302064 0.90555729 0.080492170 -0.25898781
## PC5 PC6 PC7 PC8
## clump_thickness 0.08032124 -0.24251752 -0.008515668 0.24770729
## uniformity_cell_size -0.14565287 -0.13903168 -0.205434260 -0.43629981
## uniformity_cell_shape -0.10839155 -0.07452713 -0.127209198 -0.58272674
## marginal_adhesion -0.01956898 -0.65462877 0.123830400 0.16343403
## single_epithelial_cell_size -0.63669325 0.06930891 0.211018210 0.45866910
## bare_nuclei -0.12477294 0.60922054 0.402790095 -0.12665288
## bland_chromatin 0.22766572 0.29889733 -0.700417365 0.38371888
## normal_nucleoli 0.69021015 0.02151820 0.459782742 0.07401187
## mitoses 0.10504168 0.14834515 -0.132116994 -0.05353693
## PC9
## clump_thickness -0.002747438
## uniformity_cell_size -0.733210938
## uniformity_cell_shape 0.667480798
## marginal_adhesion 0.046019211
## single_epithelial_cell_size 0.066890623
## bare_nuclei -0.076510293
## bland_chromatin 0.062241047
## normal_nucleoli -0.022078692
## mitoses 0.007496101
summary(bcw.after.pca) #summary of loadings on components
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4289 0.88088 0.73434 0.67796 0.61667 0.54943 0.54259
## Proportion of Variance 0.6555 0.08622 0.05992 0.05107 0.04225 0.03354 0.03271
## Cumulative Proportion 0.6555 0.74172 0.80163 0.85270 0.89496 0.92850 0.96121
## PC8 PC9
## Standard deviation 0.51062 0.29729
## Proportion of Variance 0.02897 0.00982
## Cumulative Proportion 0.99018 1.00000
# We analyse the amount of variance explained by subsequent principal components
variance <- round(((bcw.after.pca$sdev ^2)/sum(bcw.after.pca$sdev^2)), 4)
cumulative.variance <- cumsum(variance)
pca.df.var <- data.frame(PCs=c("Dim_1","Dim_2","Dim_3","Dim_4","Dim_5","Dim_6","Dim_7","Dim_8","Dim_9"),
Variance=variance, Cummulative_Variance=cumulative.variance)
ggplot(pca.df.var, aes(x=PCs, y=Variance, fill=PCs)) + geom_bar(stat="identity") +
geom_text(aes(label=Variance), vjust=-0.3, size=3.5) #Variance Explained by PCA

ggplot(pca.df.var, aes(x=PCs, y=Cummulative_Variance, fill=PCs)) + geom_bar(stat="identity") +
geom_text(aes(label=Cummulative_Variance), vjust=-0.3, size=3.5) #Cummulative Variance by PCA

pca.features<-data.frame(bcw.after.pca$x[,1], bcw.after.pca$x[,2], bcw.after.pca$x[,3])
colnames(bcw1)<-c("PC1","PC2","PC3")
## K-MEANS
k <- 2 # Partition into K clusters
### FOR ALL FEATURES
kmeans.k2.10x <- kmeans(bcw.features, 2, iter.max=10, nstart=10)
bcw.kmeans.labels <- kmeans.k2.10x$cluster
plot(bcw.features, col=kmeans.k2.10x$cluster)
title('K-Means Clustering for Breast Cancer Problem (10 random initialization)')

bcw.sil.kmeans <- silhouette(bcw.kmeans.labels, dist(bcw.features))
fviz_silhouette(bcw.sil.kmeans, xlab="K-means") #silhouette information
## cluster size ave.sil.width
## 1 1 453 0.76
## 2 2 230 0.28

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.kmeans.labels,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size",
ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = bcw.kmeans.labels, cex=1, inset=c(0.02))
### FOR SELECTED PCA FEATURES
kmeans.k2.10xpca <- kmeans(pca.features, centers=k, iter.max=10, nstart=10)
pca.kmeans.labels <- kmeans.k2.10xpca$cluster
plot(pca.features, col=kmeans.k2.10xpca$cluster)
title('K-Means Clustering for Breast Cancer Problem PCA (10 random initialization)')

pca.sil.kmeans <- silhouette(pca.kmeans.labels, dist(pca.features))
fviz_silhouette(pca.sil.kmeans, xlab="K-means") #silhouette information
## cluster size ave.sil.width
## 1 1 232 0.41
## 2 2 451 0.80

plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.kmeans.labels,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1",
ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = pca.kmeans.labels, cex=1, inset=c(0.02))
## PAM
library(cluster)
### Application of PAM algorithm FOR ALL FEATURES
bcw.pam2 <- pam(x=bcw.features, k=2)
X11()
plot(bcw.pam2) # default visualization (note: plot() works differently for quantitative and mixed data types)


(summary(bcw.pam2))
## Medoids:
## ID clump_thickness uniformity_cell_size uniformity_cell_shape
## 23 23 3 1 1
## 468 453 6 6 6
## marginal_adhesion single_epithelial_cell_size bare_nuclei bland_chromatin
## 23 1 2 1 2
## 468 5 4 10 7
## normal_nucleoli mitoses
## 23 1 1
## 468 6 2
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 2 1 2 1 2 1 1 1 1 1 1 1 1 2 1 1 1 2 1
## 21 22 23 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 42
## 2 2 1 1 2 1 1 1 1 1 1 2 1 1 1 2 1 2 2 2
## 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
## 2 1 2 1 2 1 1 2 1 1 2 2 2 2 2 1 2 1 2 1
## 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
## 2 1 1 1 1 2 2 1 1 2 1 2 1 1 1 1 1 1 1 1
## 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
## 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1
## 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
## 1 1 2 1 2 2 1 2 1 2 2 2 1 1 1 2 1 1 1 1
## 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 141 142 143
## 2 2 2 1 2 1 2 1 1 1 2 1 1 1 1 1 1 1 1 2
## 144 145 147 148 149 150 151 152 153 154 155 156 157 158 160 161 162 163 164 166
## 1 1 2 1 1 2 1 2 2 1 1 2 1 1 2 2 1 1 1 1
## 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
## 2 2 1 1 1 1 1 2 2 2 1 2 1 2 1 1 1 2 2 1
## 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
## 2 2 2 1 2 2 1 1 1 1 2 1 1 1 2 2 1 1 1 2
## 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
## 2 1 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2 2 1
## 227 228 229 230 231 232 233 234 235 237 238 239 240 241 242 243 244 245 246 247
## 2 2 1 2 2 2 1 2 1 2 2 2 2 1 1 1 1 1 1 2
## 248 249 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
## 2 1 1 2 2 2 2 2 1 1 1 2 2 2 2 2 2 1 2 2
## 269 270 271 272 273 274 275 277 278 279 280 281 282 283 284 285 286 287 288 289
## 2 1 2 1 2 1 1 1 1 1 2 1 1 2 2 2 2 2 1 2
## 290 291 292 294 296 297 299 300 301 302 303 304 305 306 307 308 309 310 311 312
## 2 1 1 2 2 2 1 2 2 1 2 1 2 2 1 1 2 1 1 1
## 313 314 315 317 318 319 320 321 323 324 325 326 327 328 329 330 331 332 333 334
## 2 1 1 2 2 1 2 2 1 2 1 1 2 1 2 2 2 1 1 2
## 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
## 2 1 2 1 1 2 2 1 1 1 2 1 1 1 1 2 1 1 2 2
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
## 1 1 1 2 2 2 2 2 1 1 1 1 2 2 1 1 1 1 1 1
## 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
## 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 2 1 1
## 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 413 414 415
## 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2
## 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
## 1 2 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 2
## 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
## 2 2 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 2 1
## 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
## 1 2 2 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
## 1 1 1 1 2 1 1 2 2 1 1 1 2 2 1 1 2 1 2 1
## 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
## 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2
## 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
## 2 1 1 1 2 1 1 2 2 1 1 1 1 1 1 2 1 1 1 1
## 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
## 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1
## 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
## 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 2 1 1 2
## 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
## 1 1 1 1 1 1 2 2 1 1 1 2 1 2 1 2 2 2 1 2
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
## 1 1 1 1 1 1 1 1 2 2 2 1 1 2 1 2 2 2 1 1
## 616 617 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
## 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
## 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
## 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
## 1 1 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1
## 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
## 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1
## 697 698 699
## 2 2 2
## Objective function:
## build swap
## 4.949396 4.522860
##
## Numerical information per cluster:
## size max_diss av_diss diameter separation
## [1,] 454 12.24745 2.613916 15.55635 3.605551
## [2,] 229 15.84298 8.307403 23.19483 3.605551
##
## Isolated clusters:
## L-clusters: character(0)
## L*-clusters: character(0)
##
## Silhouette plot information:
## cluster neighbor sil_width
## 23 1 2 0.8390551922
## 200 1 2 0.8390551922
## 258 1 2 0.8390551922
## 277 1 2 0.8390551922
## 393 1 2 0.8390551922
## 396 1 2 0.8390551922
## 525 1 2 0.8390551922
## 534 1 2 0.8390551922
## 551 1 2 0.8390551922
## 564 1 2 0.8390551922
## 573 1 2 0.8390551922
## 599 1 2 0.8390551922
## 601 1 2 0.8390551922
## 617 1 2 0.8390551922
## 621 1 2 0.8390551922
## 642 1 2 0.8390551922
## 643 1 2 0.8390551922
## 646 1 2 0.8390551922
## 650 1 2 0.8390551922
## 656 1 2 0.8390551922
## 12 1 2 0.8344997928
## 29 1 2 0.8344997928
## 36 1 2 0.8344997928
## 145 1 2 0.8344997928
## 366 1 2 0.8344997928
## 430 1 2 0.8344997928
## 535 1 2 0.8344997928
## 559 1 2 0.8344997928
## 138 1 2 0.8285402572
## 141 1 2 0.8285402572
## 171 1 2 0.8285402572
## 257 1 2 0.8285402572
## 402 1 2 0.8285402572
## 425 1 2 0.8285402572
## 476 1 2 0.8285402572
## 542 1 2 0.8285402572
## 555 1 2 0.8285402572
## 584 1 2 0.8285402572
## 635 1 2 0.8285402572
## 693 1 2 0.8285402572
## 35 1 2 0.8274131361
## 375 1 2 0.8274131361
## 410 1 2 0.8274131361
## 31 1 2 0.8263562573
## 471 1 2 0.8263562573
## 487 1 2 0.8263562573
## 496 1 2 0.8263562573
## 514 1 2 0.8263562573
## 142 1 2 0.8251395634
## 343 1 2 0.8251395634
## 384 1 2 0.8251395634
## 385 1 2 0.8251395634
## 446 1 2 0.8251395634
## 510 1 2 0.8251395634
## 629 1 2 0.8251395634
## 645 1 2 0.8251395634
## 680 1 2 0.8251395634
## 696 1 2 0.8251395634
## 17 1 2 0.8240760882
## 137 1 2 0.8240760882
## 499 1 2 0.8240760882
## 500 1 2 0.8240760882
## 502 1 2 0.8240760882
## 539 1 2 0.8240760882
## 544 1 2 0.8240760882
## 603 1 2 0.8240760882
## 619 1 2 0.8240760882
## 654 1 2 0.8240760882
## 134 1 2 0.8239000236
## 694 1 2 0.8237987256
## 128 1 2 0.8236350612
## 163 1 2 0.8236350612
## 169 1 2 0.8236350612
## 195 1 2 0.8236350612
## 259 1 2 0.8236350612
## 281 1 2 0.8236350612
## 323 1 2 0.8236350612
## 397 1 2 0.8236350612
## 655 1 2 0.8236350612
## 668 1 2 0.8236350612
## 615 1 2 0.8228477812
## 135 1 2 0.8218503436
## 288 1 2 0.8218503436
## 32 1 2 0.8202696893
## 95 1 2 0.8202696893
## 132 1 2 0.8202696893
## 158 1 2 0.8202696893
## 177 1 2 0.8202696893
## 243 1 2 0.8202696893
## 352 1 2 0.8202696893
## 365 1 2 0.8202696893
## 673 1 2 0.8202696893
## 389 1 2 0.8200982348
## 80 1 2 0.8182128922
## 311 1 2 0.8182128922
## 92 1 2 0.8178668469
## 526 1 2 0.8178668469
## 549 1 2 0.8173836949
## 398 1 2 0.8148621037
## 438 1 2 0.8148621037
## 465 1 2 0.8148621037
## 469 1 2 0.8148621037
## 474 1 2 0.8148621037
## 478 1 2 0.8148621037
## 527 1 2 0.8148621037
## 630 1 2 0.8148621037
## 639 1 2 0.8148621037
## 689 1 2 0.8148621037
## 548 1 2 0.8145142250
## 48 1 2 0.8143620951
## 65 1 2 0.8143620951
## 94 1 2 0.8143620951
## 126 1 2 0.8143620951
## 173 1 2 0.8143620951
## 217 1 2 0.8143620951
## 226 1 2 0.8143620951
## 278 1 2 0.8143620951
## 328 1 2 0.8143620951
## 355 1 2 0.8143620951
## 377 1 2 0.8143620951
## 406 1 2 0.8143620951
## 408 1 2 0.8143620951
## 411 1 2 0.8143620951
## 418 1 2 0.8143620951
## 429 1 2 0.8143620951
## 574 1 2 0.8143620951
## 578 1 2 0.8143620951
## 579 1 2 0.8143620951
## 661 1 2 0.8143620951
## 675 1 2 0.8143620951
## 97 1 2 0.8142818875
## 27 1 2 0.8142136063
## 139 1 2 0.8138025307
## 373 1 2 0.8138025307
## 597 1 2 0.8138025307
## 567 1 2 0.8130306090
## 82 1 2 0.8129721906
## 503 1 2 0.8129721906
## 493 1 2 0.8124619951
## 530 1 2 0.8124619951
## 399 1 2 0.8116969049
## 10 1 2 0.8107293301
## 275 1 2 0.8099987251
## 186 1 2 0.8097913000
## 18 1 2 0.8095918698
## 67 1 2 0.8095918698
## 89 1 2 0.8095918698
## 93 1 2 0.8095918698
## 179 1 2 0.8095918698
## 196 1 2 0.8095918698
## 504 1 2 0.8095918698
## 528 1 2 0.8095918698
## 662 1 2 0.8095918698
## 8 1 2 0.8095046807
## 3 1 2 0.8091069187
## 34 1 2 0.8090805311
## 282 1 2 0.8090805311
## 519 1 2 0.8087325773
## 90 1 2 0.8081013813
## 155 1 2 0.8067158642
## 182 1 2 0.8067158642
## 199 1 2 0.8067158642
## 291 1 2 0.8067158642
## 312 1 2 0.8067158642
## 314 1 2 0.8067158642
## 336 1 2 0.8067158642
## 344 1 2 0.8067158642
## 346 1 2 0.8067158642
## 376 1 2 0.8067158642
## 381 1 2 0.8067158642
## 447 1 2 0.8067158642
## 491 1 2 0.8067158642
## 505 1 2 0.8067158642
## 511 1 2 0.8067158642
## 586 1 2 0.8067158642
## 608 1 2 0.8067158642
## 624 1 2 0.8067158642
## 633 1 2 0.8067158642
## 644 1 2 0.8067158642
## 660 1 2 0.8067158642
## 666 1 2 0.8067158642
## 679 1 2 0.8067158642
## 684 1 2 0.8067158642
## 685 1 2 0.8067158642
## 686 1 2 0.8067158642
## 687 1 2 0.8067158642
## 477 1 2 0.8051785275
## 607 1 2 0.8047710643
## 315 1 2 0.8038160459
## 339 1 2 0.8038160459
## 378 1 2 0.8038160459
## 518 1 2 0.8038160459
## 602 1 2 0.8038160459
## 470 1 2 0.8028755589
## 677 1 2 0.8028755589
## 498 1 2 0.8025373111
## 391 1 2 0.8022140868
## 407 1 2 0.8018007327
## 532 1 2 0.8018007327
## 464 1 2 0.8017561534
## 25 1 2 0.8012602883
## 91 1 2 0.8012602883
## 96 1 2 0.8012602883
## 172 1 2 0.8012602883
## 194 1 2 0.8012602883
## 203 1 2 0.8012602883
## 205 1 2 0.8012602883
## 213 1 2 0.8012602883
## 218 1 2 0.8012602883
## 245 1 2 0.8012602883
## 270 1 2 0.8012602883
## 279 1 2 0.8012602883
## 292 1 2 0.8012602883
## 302 1 2 0.8012602883
## 304 1 2 0.8012602883
## 307 1 2 0.8012602883
## 308 1 2 0.8012602883
## 325 1 2 0.8012602883
## 338 1 2 0.8012602883
## 342 1 2 0.8012602883
## 552 1 2 0.8012602883
## 563 1 2 0.8012602883
## 580 1 2 0.8012602883
## 695 1 2 0.8006947673
## 672 1 2 0.8006220476
## 46 1 2 0.8005757604
## 522 1 2 0.8002172325
## 103 1 2 0.7995998037
## 62 1 2 0.7993979217
## 120 1 2 0.7971523593
## 394 1 2 0.7966688410
## 449 1 2 0.7966688410
## 497 1 2 0.7966688410
## 517 1 2 0.7966688410
## 162 1 2 0.7965159875
## 565 1 2 0.7965159875
## 356 1 2 0.7958115096
## 665 1 2 0.7942891848
## 28 1 2 0.7925144388
## 193 1 2 0.7925144388
## 512 1 2 0.7925144388
## 546 1 2 0.7925144388
## 560 1 2 0.7925144388
## 577 1 2 0.7925144388
## 596 1 2 0.7925144388
## 620 1 2 0.7925144388
## 632 1 2 0.7925144388
## 657 1 2 0.7925144388
## 444 1 2 0.7924074874
## 11 1 2 0.7918241360
## 151 1 2 0.7918241360
## 181 1 2 0.7918241360
## 208 1 2 0.7918241360
## 209 1 2 0.7918241360
## 229 1 2 0.7918241360
## 533 1 2 0.7918241360
## 521 1 2 0.7915341710
## 157 1 2 0.7911682285
## 221 1 2 0.7895519142
## 688 1 2 0.7888511148
## 453 1 2 0.7879362316
## 170 1 2 0.7875074527
## 70 1 2 0.7873915660
## 614 1 2 0.7869217227
## 148 1 2 0.7857736636
## 440 1 2 0.7851289323
## 448 1 2 0.7851289323
## 452 1 2 0.7851289323
## 475 1 2 0.7851289323
## 509 1 2 0.7851289323
## 513 1 2 0.7851289323
## 590 1 2 0.7851289323
## 610 1 2 0.7851289323
## 678 1 2 0.7851289323
## 251 1 2 0.7847652452
## 647 1 2 0.7847652452
## 506 1 2 0.7835867687
## 122 1 2 0.7833660160
## 581 1 2 0.7830482226
## 166 1 2 0.7828997532
## 545 1 2 0.7828376538
## 616 1 2 0.7826406141
## 374 1 2 0.7823768565
## 121 1 2 0.7803012321
## 433 1 2 0.7797233575
## 588 1 2 0.7797233575
## 653 1 2 0.7797233575
## 383 1 2 0.7796288556
## 1 1 2 0.7790344139
## 98 1 2 0.7790344139
## 204 1 2 0.7790344139
## 272 1 2 0.7790344139
## 537 1 2 0.7790344139
## 561 1 2 0.7790344139
## 562 1 2 0.7790344139
## 541 1 2 0.7780535845
## 481 1 2 0.7765085201
## 459 1 2 0.7763799360
## 485 1 2 0.7763799360
## 594 1 2 0.7763799360
## 641 1 2 0.7760597639
## 242 1 2 0.7745577575
## 451 1 2 0.7745533514
## 79 1 2 0.7743175694
## 625 1 2 0.7741984806
## 663 1 2 0.7739261368
## 664 1 2 0.7739261368
## 568 1 2 0.7736998033
## 479 1 2 0.7735884123
## 210 1 2 0.7704750832
## 414 1 2 0.7699108502
## 538 1 2 0.7699108502
## 576 1 2 0.7699108502
## 326 1 2 0.7696471565
## 434 1 2 0.7694800715
## 455 1 2 0.7694264675
## 5 1 2 0.7691699155
## 49 1 2 0.7691699155
## 30 1 2 0.7686860780
## 369 1 2 0.7686860780
## 372 1 2 0.7686860780
## 109 1 2 0.7685202756
## 648 1 2 0.7680359259
## 691 1 2 0.7680359259
## 154 1 2 0.7679355301
## 332 1 2 0.7676196798
## 370 1 2 0.7674839654
## 83 1 2 0.7669857062
## 683 1 2 0.7666924047
## 395 1 2 0.7636108715
## 652 1 2 0.7628229470
## 400 1 2 0.7587684182
## 431 1 2 0.7587322264
## 348 1 2 0.7580490391
## 371 1 2 0.7566993282
## 246 1 2 0.7556003593
## 557 1 2 0.7553287460
## 14 1 2 0.7552176230
## 71 1 2 0.7550090852
## 131 1 2 0.7550090852
## 424 1 2 0.7550090852
## 536 1 2 0.7534069633
## 390 1 2 0.7531617435
## 190 1 2 0.7517601838
## 333 1 2 0.7505139978
## 351 1 2 0.7499628244
## 461 1 2 0.7499628244
## 558 1 2 0.7499628244
## 640 1 2 0.7499628244
## 460 1 2 0.7498495085
## 405 1 2 0.7480845163
## 553 1 2 0.7477065579
## 667 1 2 0.7472194854
## 540 1 2 0.7469751555
## 636 1 2 0.7461065101
## 543 1 2 0.7448085055
## 409 1 2 0.7445651252
## 439 1 2 0.7424203592
## 598 1 2 0.7423000292
## 78 1 2 0.7408485883
## 76 1 2 0.7368557424
## 473 1 2 0.7366477327
## 20 1 2 0.7351750538
## 183 1 2 0.7351750538
## 501 1 2 0.7351750538
## 404 1 2 0.7351651439
## 676 1 2 0.7348945654
## 77 1 2 0.7336494643
## 443 1 2 0.7302904634
## 379 1 2 0.7287042809
## 486 1 2 0.7261700122
## 674 1 2 0.7255749391
## 119 1 2 0.7255404813
## 363 1 2 0.7244947371
## 115 1 2 0.7244796509
## 136 1 2 0.7232359719
## 508 1 2 0.7210223294
## 421 1 2 0.7171831028
## 319 1 2 0.7163152958
## 403 1 2 0.7160726252
## 651 1 2 0.7157875924
## 419 1 2 0.7125825401
## 463 1 2 0.7114787845
## 472 1 2 0.7114787845
## 386 1 2 0.7110380623
## 347 1 2 0.7051602240
## 420 1 2 0.7045659600
## 529 1 2 0.7028462927
## 198 1 2 0.7022021781
## 220 1 2 0.7021271296
## 631 1 2 0.7007505904
## 9 1 2 0.6996110270
## 600 1 2 0.6970828208
## 432 1 2 0.6917445746
## 626 1 2 0.6915947007
## 266 1 2 0.6904042115
## 423 1 2 0.6903662693
## 628 1 2 0.6825087532
## 388 1 2 0.6821673387
## 462 1 2 0.6816306285
## 638 1 2 0.6807388546
## 116 1 2 0.6727188095
## 144 1 2 0.6727188095
## 482 1 2 0.6679658281
## 241 1 2 0.6666348748
## 623 1 2 0.6568482211
## 223 1 2 0.6373510759
## 111 1 2 0.6316945641
## 380 1 2 0.6288998893
## 84 1 2 0.6241824633
## 81 1 2 0.6192492761
## 249 1 2 0.6135338346
## 310 1 2 0.6122257742
## 244 1 2 0.6088993130
## 442 1 2 0.6088420068
## 364 1 2 0.6075058340
## 585 1 2 0.6069547155
## 445 1 2 0.6052732905
## 427 1 2 0.5789697884
## 554 1 2 0.5766496142
## 299 1 2 0.5743399582
## 357 1 2 0.5724670145
## 164 1 2 0.5699115088
## 235 1 2 0.5697604958
## 38 1 2 0.5676024711
## 690 1 2 0.5628224207
## 490 1 2 0.5612663453
## 73 1 2 0.5526639090
## 117 1 2 0.5340430997
## 13 1 2 0.5067838408
## 130 1 2 0.4976031877
## 622 1 2 0.4922247894
## 274 1 2 0.4630476891
## 52 1 2 0.4499555767
## 556 1 2 0.4483924203
## 416 1 2 0.4159922560
## 102 1 2 0.3919721453
## 7 1 2 0.3742831460
## 456 1 2 0.3546045557
## 58 1 2 0.3400371816
## 104 1 2 0.3211844361
## 149 1 2 0.3067586017
## 233 1 2 0.3003040397
## 106 1 2 0.2908166855
## 60 1 2 0.2900669210
## 495 1 2 0.2549651525
## 658 1 2 0.2516824905
## 16 1 2 0.2265575425
## 349 1 2 0.2176979031
## 64 1 2 0.2163982233
## 66 1 2 0.1896334874
## 51 1 2 0.1742587228
## 75 1 2 0.1732895190
## 44 1 2 0.1315833529
## 6 2 1 0.4960761035
## 247 2 1 0.4958519160
## 214 2 1 0.4871184751
## 286 2 1 0.4851962770
## 201 2 1 0.4844903702
## 191 2 1 0.4794606276
## 587 2 1 0.4773440370
## 211 2 1 0.4754998299
## 263 2 1 0.4744907891
## 184 2 1 0.4728016369
## 212 2 1 0.4717710413
## 206 2 1 0.4708111578
## 484 2 1 0.4706205926
## 318 2 1 0.4697905984
## 450 2 1 0.4680337564
## 368 2 1 0.4670068624
## 426 2 1 0.4636678153
## 547 2 1 0.4616694650
## 232 2 1 0.4594325679
## 367 2 1 0.4589742642
## 681 2 1 0.4585138828
## 174 2 1 0.4549243491
## 160 2 1 0.4508051863
## 215 2 1 0.4504349240
## 494 2 1 0.4488277682
## 566 2 1 0.4464735260
## 161 2 1 0.4436891955
## 262 2 1 0.4422296346
## 230 2 1 0.4411727500
## 100 2 1 0.4407048896
## 392 2 1 0.4401748296
## 457 2 1 0.4390277126
## 572 2 1 0.4385866627
## 682 2 1 0.4378322053
## 582 2 1 0.4327401252
## 515 2 1 0.4325899231
## 150 2 1 0.4310016295
## 303 2 1 0.4294055955
## 613 2 1 0.4272292149
## 176 2 1 0.4268738930
## 227 2 1 0.4222060708
## 422 2 1 0.4208325245
## 192 2 1 0.4201021501
## 133 2 1 0.4195561402
## 466 2 1 0.4171467904
## 361 2 1 0.4155515529
## 153 2 1 0.4153600799
## 15 2 1 0.4149794259
## 583 2 1 0.4145942615
## 358 2 1 0.4103599290
## 488 2 1 0.4100411963
## 468 2 1 0.4096791116
## 290 2 1 0.4094617364
## 225 2 1 0.4081392720
## 606 2 1 0.4080698552
## 287 2 1 0.4055278571
## 167 2 1 0.4044009339
## 670 2 1 0.4043012062
## 480 2 1 0.4032472843
## 345 2 1 0.3968773016
## 216 2 1 0.3964967624
## 507 2 1 0.3947557653
## 239 2 1 0.3942934915
## 382 2 1 0.3930134002
## 189 2 1 0.3917837591
## 659 2 1 0.3914682243
## 219 2 1 0.3914516037
## 413 2 1 0.3878176325
## 354 2 1 0.3873380195
## 296 2 1 0.3865788274
## 483 2 1 0.3839103413
## 417 2 1 0.3837896076
## 43 2 1 0.3818288312
## 254 2 1 0.3818288312
## 74 2 1 0.3797396095
## 467 2 1 0.3794464252
## 516 2 1 0.3793906332
## 454 2 1 0.3777801683
## 123 2 1 0.3777629924
## 228 2 1 0.3775055560
## 188 2 1 0.3772836058
## 222 2 1 0.3771826149
## 88 2 1 0.3758750634
## 609 2 1 0.3746904970
## 570 2 1 0.3738336542
## 458 2 1 0.3716934430
## 85 2 1 0.3715200275
## 202 2 1 0.3655947737
## 306 2 1 0.3614087383
## 671 2 1 0.3580627537
## 125 2 1 0.3537949980
## 237 2 1 0.3526395552
## 264 2 1 0.3487057651
## 401 2 1 0.3484729450
## 114 2 1 0.3480297122
## 571 2 1 0.3440854257
## 37 2 1 0.3426006764
## 107 2 1 0.3421786259
## 22 2 1 0.3420583297
## 108 2 1 0.3406937852
## 637 2 1 0.3390415474
## 524 2 1 0.3359352440
## 105 2 1 0.3306408672
## 127 2 1 0.3282526836
## 697 2 1 0.3251777881
## 69 2 1 0.3249541032
## 309 2 1 0.3246506916
## 19 2 1 0.3241334721
## 261 2 1 0.3225371319
## 267 2 1 0.3223442294
## 45 2 1 0.3194861481
## 50 2 1 0.3173393269
## 649 2 1 0.3158703281
## 627 2 1 0.3120531692
## 207 2 1 0.3096030163
## 33 2 1 0.3083532169
## 118 2 1 0.3070423386
## 110 2 1 0.3067425617
## 415 2 1 0.3064437961
## 592 2 1 0.3047770748
## 285 2 1 0.3042060148
## 301 2 1 0.3010835141
## 47 2 1 0.3001594879
## 63 2 1 0.2997635006
## 255 2 1 0.2997635006
## 187 2 1 0.2985333101
## 550 2 1 0.2979963631
## 531 2 1 0.2950003497
## 359 2 1 0.2942822990
## 612 2 1 0.2933310500
## 238 2 1 0.2924991060
## 492 2 1 0.2922749378
## 197 2 1 0.2914233556
## 57 2 1 0.2891969522
## 435 2 1 0.2879614698
## 54 2 1 0.2849553401
## 335 2 1 0.2794149533
## 55 2 1 0.2750758164
## 337 2 1 0.2744386384
## 185 2 1 0.2723100234
## 252 2 1 0.2715845796
## 269 2 1 0.2698819951
## 231 2 1 0.2672291110
## 436 2 1 0.2664600791
## 692 2 1 0.2654748873
## 699 2 1 0.2641902996
## 330 2 1 0.2625774494
## 441 2 1 0.2604459474
## 99 2 1 0.2600101335
## 520 2 1 0.2540254246
## 39 2 1 0.2535881552
## 271 2 1 0.2521274013
## 113 2 1 0.2481645580
## 362 2 1 0.2460810054
## 178 2 1 0.2456281664
## 575 2 1 0.2382929076
## 611 2 1 0.2378529187
## 321 2 1 0.2378296289
## 21 2 1 0.2318342039
## 224 2 1 0.2306488709
## 256 2 1 0.2247062299
## 175 2 1 0.2241981279
## 340 2 1 0.2194158266
## 265 2 1 0.2187400022
## 300 2 1 0.2185593231
## 595 2 1 0.2160482242
## 334 2 1 0.2149052387
## 72 2 1 0.2117769835
## 86 2 1 0.2108784254
## 234 2 1 0.2088469958
## 387 2 1 0.2001733981
## 589 2 1 0.1966227119
## 634 2 1 0.1913866937
## 698 2 1 0.1910561256
## 305 2 1 0.1868249986
## 129 2 1 0.1860810986
## 428 2 1 0.1851823984
## 329 2 1 0.1787405996
## 68 2 1 0.1771361253
## 437 2 1 0.1725212278
## 324 2 1 0.1665398577
## 283 2 1 0.1661609528
## 56 2 1 0.1660328368
## 168 2 1 0.1643868700
## 284 2 1 0.1587159591
## 341 2 1 0.1525545585
## 294 2 1 0.1523949001
## 101 2 1 0.1518242244
## 280 2 1 0.1508421486
## 53 2 1 0.1408862250
## 260 2 1 0.1404192016
## 604 2 1 0.1342964098
## 605 2 1 0.1301816839
## 240 2 1 0.1275360990
## 569 2 1 0.1254744904
## 317 2 1 0.1231040495
## 593 2 1 0.1117087938
## 2 2 1 0.1111314047
## 360 2 1 0.1082820016
## 331 2 1 0.1024349876
## 313 2 1 0.1023000709
## 253 2 1 0.0993707001
## 4 2 1 0.0948486101
## 156 2 1 0.0946531873
## 124 2 1 0.0920288429
## 112 2 1 0.0893270551
## 152 2 1 0.0881158561
## 591 2 1 0.0764686428
## 523 2 1 0.0710359246
## 669 2 1 0.0362279926
## 143 2 1 0.0356336431
## 350 2 1 0.0260346776
## 87 2 1 0.0052381442
## 40 2 1 -0.0008144982
## 61 2 1 -0.0323805276
## 268 2 1 -0.0547775967
## 273 2 1 -0.0547775967
## 248 2 1 -0.0617937638
## 327 2 1 -0.0707318488
## 180 2 1 -0.0874725524
## 489 2 1 -0.0921716413
## 320 2 1 -0.1062849245
## 289 2 1 -0.1171822935
## 59 2 1 -0.1227622604
## 297 2 1 -0.1514838984
## 147 2 1 -0.1520148546
## 42 2 1 -0.1619599324
## 353 2 1 -0.1889455512
## 26 2 1 -0.2035022665
## Average silhouette width per cluster:
## [1] 0.7550140 0.2828213
## Average silhouette width of total data set:
## [1] 0.5966946
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
bcw.cluster.labels <- bcw.pam2$clustering
bcw.sil.pam2 <- silhouette(bcw.cluster.labels, dist(bcw.features))
fviz_silhouette(bcw.sil.pam2, xlab="PAM") #silhouette information
## cluster size ave.sil.width
## 1 1 454 0.76
## 2 2 229 0.28

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.cluster.labels,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size",
ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = bcw.cluster.labels, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')
### Application of PAM algorithm FOR PCA FEATURES
pca.pam2 <- pam(x=pca.features, k=2)
#X11()
#plot(pca.pam2) # default visualization (note: plot() works differently for quantitative and mixed data types)
(summary(pca.pam2))
## Medoids:
## ID bcw.after.pca.x...1. bcw.after.pca.x...2. bcw.after.pca.x...3.
## 134 132 1.714471 0.09986549 -0.006010226
## 659 643 -3.115058 -0.50006703 0.011453841
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 2 1 2 1 2 1 1 1 1 1 1 1 1 2 2 1 1 2 1
## 21 22 23 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 42
## 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 2 2
## 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
## 2 2 2 1 2 1 1 2 2 1 2 2 2 2 2 1 1 1 2 1
## 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
## 2 1 1 2 1 2 2 1 1 2 1 2 2 1 1 1 1 1 1 1
## 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
## 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1
## 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
## 1 1 2 1 2 2 1 2 1 2 2 2 1 1 1 2 1 1 1 1
## 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 141 142 143
## 2 2 2 1 2 1 2 1 1 1 2 1 1 1 1 1 1 1 1 2
## 144 145 147 148 149 150 151 152 153 154 155 156 157 158 160 161 162 163 164 166
## 1 1 2 1 1 2 1 2 2 1 1 2 1 1 2 2 1 1 1 1
## 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
## 2 2 1 1 1 1 1 2 2 2 1 2 1 2 1 1 1 2 2 1
## 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
## 2 2 2 1 2 2 1 1 1 1 2 1 1 1 2 2 1 1 1 2
## 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
## 2 1 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2 2 1
## 227 228 229 230 231 232 233 234 235 237 238 239 240 241 242 243 244 245 246 247
## 2 2 1 2 2 2 1 2 1 2 2 2 2 1 1 1 1 1 1 2
## 248 249 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
## 1 1 1 2 2 2 2 2 1 1 1 2 2 2 2 2 2 1 2 2
## 269 270 271 272 273 274 275 277 278 279 280 281 282 283 284 285 286 287 288 289
## 2 1 2 1 2 1 1 1 1 1 2 1 1 2 2 2 2 2 1 2
## 290 291 292 294 296 297 299 300 301 302 303 304 305 306 307 308 309 310 311 312
## 2 1 1 2 2 2 1 2 2 1 2 1 2 2 1 1 2 1 1 1
## 313 314 315 317 318 319 320 321 323 324 325 326 327 328 329 330 331 332 333 334
## 2 1 1 2 2 1 2 2 1 2 1 1 1 1 2 2 2 1 1 2
## 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
## 2 1 2 1 1 2 2 1 1 1 2 1 1 1 2 2 1 1 2 2
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
## 1 1 1 2 2 2 2 2 1 1 1 1 2 2 1 1 1 1 1 1
## 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
## 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 2 1 1
## 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 413 414 415
## 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2
## 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
## 1 2 1 1 1 1 2 1 1 1 2 1 2 1 1 1 1 1 1 2
## 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
## 2 2 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 2 1
## 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
## 1 2 2 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1
## 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
## 1 1 1 1 2 1 1 2 2 1 1 1 2 2 1 1 2 1 2 1
## 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
## 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2
## 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
## 2 1 1 1 2 1 1 2 2 1 1 1 1 1 1 2 1 1 1 1
## 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
## 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1
## 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
## 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 2 1 1 2
## 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
## 1 1 1 1 1 1 2 2 1 1 1 2 1 2 1 2 2 2 1 2
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
## 1 1 1 1 1 1 1 1 2 2 2 1 1 2 1 2 2 2 1 1
## 616 617 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
## 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1
## 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
## 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
## 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
## 1 2 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1
## 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
## 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1
## 697 698 699
## 2 2 2
## Objective function:
## build swap
## 1.199631 1.118681
##
## Numerical information per cluster:
## size max_diss av_diss diameter separation
## [1,] 451 3.871514 0.6667585 5.732047 0.1954325
## [2,] 232 5.759795 1.9972022 8.103404 0.1954325
##
## Isolated clusters:
## L-clusters: character(0)
## L*-clusters: character(0)
##
## Silhouette plot information:
## cluster neighbor sil_width
## 23 1 2 0.8700179613
## 200 1 2 0.8700179613
## 258 1 2 0.8700179613
## 277 1 2 0.8700179613
## 393 1 2 0.8700179613
## 396 1 2 0.8700179613
## 525 1 2 0.8700179613
## 534 1 2 0.8700179613
## 551 1 2 0.8700179613
## 564 1 2 0.8700179613
## 573 1 2 0.8700179613
## 599 1 2 0.8700179613
## 601 1 2 0.8700179613
## 617 1 2 0.8700179613
## 621 1 2 0.8700179613
## 642 1 2 0.8700179613
## 643 1 2 0.8700179613
## 646 1 2 0.8700179613
## 650 1 2 0.8700179613
## 656 1 2 0.8700179613
## 134 1 2 0.8695091890
## 519 1 2 0.8691071880
## 92 1 2 0.8687821986
## 526 1 2 0.8687821986
## 35 1 2 0.8685340229
## 375 1 2 0.8685340229
## 410 1 2 0.8685340229
## 128 1 2 0.8674225071
## 163 1 2 0.8674225071
## 169 1 2 0.8674225071
## 195 1 2 0.8674225071
## 259 1 2 0.8674225071
## 281 1 2 0.8674225071
## 323 1 2 0.8674225071
## 397 1 2 0.8674225071
## 655 1 2 0.8674225071
## 668 1 2 0.8674225071
## 453 1 2 0.8673245999
## 27 1 2 0.8666938637
## 135 1 2 0.8666882536
## 288 1 2 0.8666882536
## 506 1 2 0.8660268999
## 695 1 2 0.8657641411
## 688 1 2 0.8643841136
## 12 1 2 0.8643156960
## 29 1 2 0.8643156960
## 36 1 2 0.8643156960
## 145 1 2 0.8643156960
## 366 1 2 0.8643156960
## 430 1 2 0.8643156960
## 535 1 2 0.8643156960
## 559 1 2 0.8643156960
## 404 1 2 0.8640405720
## 399 1 2 0.8639400482
## 389 1 2 0.8633921237
## 641 1 2 0.8630522732
## 628 1 2 0.8627805615
## 31 1 2 0.8616963438
## 471 1 2 0.8616963438
## 487 1 2 0.8616963438
## 496 1 2 0.8616963438
## 514 1 2 0.8616963438
## 614 1 2 0.8614914652
## 451 1 2 0.8613736916
## 275 1 2 0.8611665683
## 80 1 2 0.8611259050
## 311 1 2 0.8611259050
## 32 1 2 0.8606262688
## 95 1 2 0.8606262688
## 132 1 2 0.8606262688
## 158 1 2 0.8606262688
## 177 1 2 0.8606262688
## 243 1 2 0.8606262688
## 352 1 2 0.8606262688
## 365 1 2 0.8606262688
## 673 1 2 0.8606262688
## 636 1 2 0.8603854181
## 138 1 2 0.8603472343
## 141 1 2 0.8603472343
## 171 1 2 0.8603472343
## 257 1 2 0.8603472343
## 402 1 2 0.8603472343
## 425 1 2 0.8603472343
## 476 1 2 0.8603472343
## 542 1 2 0.8603472343
## 555 1 2 0.8603472343
## 584 1 2 0.8603472343
## 635 1 2 0.8603472343
## 693 1 2 0.8603472343
## 3 1 2 0.8597295721
## 97 1 2 0.8596549595
## 567 1 2 0.8596435545
## 82 1 2 0.8595483469
## 503 1 2 0.8595483469
## 186 1 2 0.8584724310
## 607 1 2 0.8579559096
## 665 1 2 0.8575903159
## 455 1 2 0.8571805774
## 142 1 2 0.8571359501
## 343 1 2 0.8571359501
## 384 1 2 0.8571359501
## 385 1 2 0.8571359501
## 446 1 2 0.8571359501
## 510 1 2 0.8571359501
## 629 1 2 0.8571359501
## 645 1 2 0.8571359501
## 680 1 2 0.8571359501
## 696 1 2 0.8571359501
## 615 1 2 0.8571072345
## 462 1 2 0.8569943925
## 8 1 2 0.8559537371
## 17 1 2 0.8541967870
## 137 1 2 0.8541967870
## 499 1 2 0.8541967870
## 500 1 2 0.8541967870
## 502 1 2 0.8541967870
## 539 1 2 0.8541967870
## 544 1 2 0.8541967870
## 603 1 2 0.8541967870
## 619 1 2 0.8541967870
## 654 1 2 0.8541967870
## 18 1 2 0.8535261110
## 67 1 2 0.8535261110
## 89 1 2 0.8535261110
## 93 1 2 0.8535261110
## 179 1 2 0.8535261110
## 196 1 2 0.8535261110
## 504 1 2 0.8535261110
## 528 1 2 0.8535261110
## 662 1 2 0.8535261110
## 434 1 2 0.8523382499
## 508 1 2 0.8521699749
## 30 1 2 0.8521018546
## 369 1 2 0.8521018546
## 372 1 2 0.8521018546
## 400 1 2 0.8512756965
## 10 1 2 0.8510973572
## 251 1 2 0.8502669368
## 647 1 2 0.8502669368
## 116 1 2 0.8495744988
## 144 1 2 0.8495744988
## 139 1 2 0.8495140200
## 373 1 2 0.8495140200
## 597 1 2 0.8495140200
## 370 1 2 0.8494984586
## 464 1 2 0.8494303338
## 120 1 2 0.8492341859
## 242 1 2 0.8490284868
## 549 1 2 0.8489023528
## 79 1 2 0.8485840499
## 651 1 2 0.8485104505
## 162 1 2 0.8479871364
## 565 1 2 0.8479871364
## 90 1 2 0.8475690136
## 545 1 2 0.8466395445
## 493 1 2 0.8466173631
## 530 1 2 0.8466173631
## 498 1 2 0.8465852707
## 548 1 2 0.8464081015
## 522 1 2 0.8461948283
## 470 1 2 0.8458939651
## 677 1 2 0.8458939651
## 444 1 2 0.8456615040
## 663 1 2 0.8454528677
## 664 1 2 0.8454528677
## 356 1 2 0.8452935109
## 477 1 2 0.8450584370
## 154 1 2 0.8449903837
## 672 1 2 0.8448069175
## 62 1 2 0.8447467683
## 34 1 2 0.8445621986
## 282 1 2 0.8445621986
## 398 1 2 0.8441986945
## 438 1 2 0.8441986945
## 465 1 2 0.8441986945
## 469 1 2 0.8441986945
## 474 1 2 0.8441986945
## 478 1 2 0.8441986945
## 527 1 2 0.8441986945
## 630 1 2 0.8441986945
## 639 1 2 0.8441986945
## 689 1 2 0.8441986945
## 157 1 2 0.8436125027
## 568 1 2 0.8428824291
## 103 1 2 0.8423347404
## 48 1 2 0.8418108292
## 65 1 2 0.8418108292
## 94 1 2 0.8418108292
## 126 1 2 0.8418108292
## 173 1 2 0.8418108292
## 217 1 2 0.8418108292
## 226 1 2 0.8418108292
## 278 1 2 0.8418108292
## 328 1 2 0.8418108292
## 355 1 2 0.8418108292
## 377 1 2 0.8418108292
## 406 1 2 0.8418108292
## 408 1 2 0.8418108292
## 411 1 2 0.8418108292
## 418 1 2 0.8418108292
## 429 1 2 0.8418108292
## 574 1 2 0.8418108292
## 578 1 2 0.8418108292
## 579 1 2 0.8418108292
## 661 1 2 0.8418108292
## 675 1 2 0.8418108292
## 439 1 2 0.8415360355
## 407 1 2 0.8414583981
## 532 1 2 0.8414583981
## 326 1 2 0.8409555861
## 77 1 2 0.8405431593
## 348 1 2 0.8401012686
## 616 1 2 0.8397339851
## 395 1 2 0.8388279601
## 148 1 2 0.8381962962
## 351 1 2 0.8375160238
## 461 1 2 0.8375160238
## 558 1 2 0.8375160238
## 640 1 2 0.8375160238
## 155 1 2 0.8375061091
## 182 1 2 0.8375061091
## 199 1 2 0.8375061091
## 291 1 2 0.8375061091
## 312 1 2 0.8375061091
## 314 1 2 0.8375061091
## 336 1 2 0.8375061091
## 344 1 2 0.8375061091
## 346 1 2 0.8375061091
## 376 1 2 0.8375061091
## 381 1 2 0.8375061091
## 447 1 2 0.8375061091
## 491 1 2 0.8375061091
## 505 1 2 0.8375061091
## 511 1 2 0.8375061091
## 586 1 2 0.8375061091
## 608 1 2 0.8375061091
## 624 1 2 0.8375061091
## 633 1 2 0.8375061091
## 644 1 2 0.8375061091
## 660 1 2 0.8375061091
## 666 1 2 0.8375061091
## 679 1 2 0.8375061091
## 684 1 2 0.8375061091
## 685 1 2 0.8375061091
## 686 1 2 0.8375061091
## 687 1 2 0.8375061091
## 521 1 2 0.8374899427
## 5 1 2 0.8362348944
## 49 1 2 0.8362348944
## 25 1 2 0.8358485413
## 91 1 2 0.8358485413
## 96 1 2 0.8358485413
## 172 1 2 0.8358485413
## 194 1 2 0.8358485413
## 203 1 2 0.8358485413
## 205 1 2 0.8358485413
## 213 1 2 0.8358485413
## 218 1 2 0.8358485413
## 245 1 2 0.8358485413
## 270 1 2 0.8358485413
## 279 1 2 0.8358485413
## 292 1 2 0.8358485413
## 302 1 2 0.8358485413
## 304 1 2 0.8358485413
## 307 1 2 0.8358485413
## 308 1 2 0.8358485413
## 325 1 2 0.8358485413
## 338 1 2 0.8358485413
## 342 1 2 0.8358485413
## 552 1 2 0.8358485413
## 563 1 2 0.8358485413
## 580 1 2 0.8358485413
## 315 1 2 0.8355684892
## 339 1 2 0.8355684892
## 378 1 2 0.8355684892
## 518 1 2 0.8355684892
## 602 1 2 0.8355684892
## 166 1 2 0.8353921766
## 119 1 2 0.8346706316
## 11 1 2 0.8345351367
## 151 1 2 0.8345351367
## 181 1 2 0.8345351367
## 208 1 2 0.8345351367
## 209 1 2 0.8345351367
## 229 1 2 0.8345351367
## 533 1 2 0.8345351367
## 676 1 2 0.8332002280
## 109 1 2 0.8325462988
## 431 1 2 0.8312627153
## 626 1 2 0.8309867170
## 122 1 2 0.8306552048
## 374 1 2 0.8292699725
## 625 1 2 0.8290325748
## 170 1 2 0.8283847036
## 70 1 2 0.8277462109
## 391 1 2 0.8275951925
## 394 1 2 0.8275559259
## 449 1 2 0.8275559259
## 497 1 2 0.8275559259
## 517 1 2 0.8275559259
## 14 1 2 0.8261979172
## 694 1 2 0.8260490036
## 486 1 2 0.8242403048
## 433 1 2 0.8190504846
## 588 1 2 0.8190504846
## 653 1 2 0.8190504846
## 28 1 2 0.8189866610
## 193 1 2 0.8189866610
## 512 1 2 0.8189866610
## 546 1 2 0.8189866610
## 560 1 2 0.8189866610
## 577 1 2 0.8189866610
## 596 1 2 0.8189866610
## 620 1 2 0.8189866610
## 632 1 2 0.8189866610
## 657 1 2 0.8189866610
## 1 1 2 0.8187639671
## 98 1 2 0.8187639671
## 204 1 2 0.8187639671
## 272 1 2 0.8187639671
## 537 1 2 0.8187639671
## 561 1 2 0.8187639671
## 562 1 2 0.8187639671
## 648 1 2 0.8174654413
## 691 1 2 0.8174654413
## 210 1 2 0.8173075575
## 443 1 2 0.8171674135
## 190 1 2 0.8171247159
## 420 1 2 0.8168898477
## 683 1 2 0.8136517335
## 221 1 2 0.8132143718
## 541 1 2 0.8123728198
## 383 1 2 0.8120838008
## 557 1 2 0.8114160193
## 371 1 2 0.8107828321
## 581 1 2 0.8101576760
## 440 1 2 0.8099295643
## 448 1 2 0.8099295643
## 452 1 2 0.8099295643
## 475 1 2 0.8099295643
## 509 1 2 0.8099295643
## 513 1 2 0.8099295643
## 590 1 2 0.8099295643
## 610 1 2 0.8099295643
## 678 1 2 0.8099295643
## 479 1 2 0.8088207917
## 121 1 2 0.8084118960
## 83 1 2 0.8070658721
## 459 1 2 0.8062612101
## 485 1 2 0.8062612101
## 594 1 2 0.8062612101
## 405 1 2 0.8053445886
## 223 1 2 0.8048752066
## 246 1 2 0.8046738808
## 414 1 2 0.8043028508
## 538 1 2 0.8043028508
## 576 1 2 0.8043028508
## 543 1 2 0.8037193155
## 78 1 2 0.8032188775
## 46 1 2 0.8027219940
## 481 1 2 0.8018234326
## 363 1 2 0.8004427103
## 536 1 2 0.8000897917
## 652 1 2 0.7994131055
## 198 1 2 0.7993970376
## 390 1 2 0.7986300935
## 460 1 2 0.7983093294
## 379 1 2 0.7981151112
## 115 1 2 0.7972213475
## 71 1 2 0.7968320128
## 131 1 2 0.7968320128
## 424 1 2 0.7968320128
## 463 1 2 0.7918861238
## 472 1 2 0.7918861238
## 409 1 2 0.7908559266
## 249 1 2 0.7892470907
## 333 1 2 0.7885800561
## 319 1 2 0.7880646813
## 585 1 2 0.7880526794
## 136 1 2 0.7877721322
## 76 1 2 0.7874628385
## 332 1 2 0.7864117581
## 598 1 2 0.7856328637
## 482 1 2 0.7819411473
## 600 1 2 0.7817107492
## 553 1 2 0.7798631857
## 674 1 2 0.7788604263
## 347 1 2 0.7682689411
## 540 1 2 0.7663419252
## 20 1 2 0.7656629767
## 183 1 2 0.7656629767
## 501 1 2 0.7656629767
## 421 1 2 0.7655400206
## 403 1 2 0.7640010041
## 667 1 2 0.7625303838
## 445 1 2 0.7566601281
## 473 1 2 0.7564568767
## 638 1 2 0.7556068736
## 529 1 2 0.7522961123
## 310 1 2 0.7513819023
## 266 1 2 0.7491496351
## 423 1 2 0.7488702007
## 432 1 2 0.7460086541
## 84 1 2 0.7440794294
## 442 1 2 0.7424647956
## 244 1 2 0.7411638827
## 81 1 2 0.7384361978
## 241 1 2 0.7337808782
## 631 1 2 0.7296178630
## 220 1 2 0.7286078712
## 7 1 2 0.7186900700
## 419 1 2 0.7156621123
## 554 1 2 0.7101842819
## 427 1 2 0.7070416903
## 623 1 2 0.7056674138
## 388 1 2 0.7031681798
## 364 1 2 0.6960803376
## 235 1 2 0.6856260871
## 38 1 2 0.6792289177
## 130 1 2 0.6790874080
## 386 1 2 0.6675903962
## 111 1 2 0.6595356819
## 380 1 2 0.6562492450
## 556 1 2 0.6355712201
## 299 1 2 0.5970975794
## 73 1 2 0.5849775996
## 490 1 2 0.5832164256
## 9 1 2 0.5764595550
## 357 1 2 0.5153747247
## 117 1 2 0.5124054852
## 13 1 2 0.5089601493
## 102 1 2 0.5029617385
## 52 1 2 0.4849722813
## 274 1 2 0.4601146000
## 622 1 2 0.4512211463
## 164 1 2 0.4464002224
## 416 1 2 0.4319967536
## 456 1 2 0.3957842657
## 690 1 2 0.3738142671
## 495 1 2 0.3731851735
## 26 1 2 0.3180243896
## 149 1 2 0.2659170141
## 60 1 2 0.2097421035
## 104 1 2 0.1916239230
## 233 1 2 0.1865550700
## 64 1 2 0.1856150300
## 59 1 2 0.1795207504
## 248 1 2 0.1599340015
## 327 1 2 0.1516320647
## 58 1 2 0.1336629588
## 106 1 2 0.0845487305
## 43 2 1 0.5987976297
## 254 2 1 0.5987976297
## 37 2 1 0.5962998735
## 201 2 1 0.5949290659
## 417 2 1 0.5946926844
## 216 2 1 0.5946001883
## 232 2 1 0.5944551344
## 572 2 1 0.5943042542
## 458 2 1 0.5936584445
## 45 2 1 0.5904133905
## 697 2 1 0.5887598620
## 466 2 1 0.5884029821
## 413 2 1 0.5871720643
## 222 2 1 0.5864513724
## 637 2 1 0.5858514014
## 345 2 1 0.5856425032
## 219 2 1 0.5819725251
## 659 2 1 0.5819212411
## 74 2 1 0.5816219278
## 212 2 1 0.5816124846
## 153 2 1 0.5813879940
## 263 2 1 0.5787679799
## 570 2 1 0.5768854318
## 484 2 1 0.5766975713
## 176 2 1 0.5751354174
## 682 2 1 0.5743806419
## 100 2 1 0.5740754248
## 571 2 1 0.5724473926
## 247 2 1 0.5708742633
## 583 2 1 0.5684196967
## 15 2 1 0.5665718247
## 192 2 1 0.5664463508
## 225 2 1 0.5664409029
## 191 2 1 0.5656350595
## 457 2 1 0.5653768559
## 494 2 1 0.5651090760
## 488 2 1 0.5646286480
## 301 2 1 0.5642068742
## 202 2 1 0.5638656777
## 318 2 1 0.5635939657
## 422 2 1 0.5627643456
## 228 2 1 0.5611486500
## 133 2 1 0.5600782630
## 264 2 1 0.5599759635
## 515 2 1 0.5589784190
## 114 2 1 0.5587007822
## 670 2 1 0.5578858032
## 261 2 1 0.5575466818
## 382 2 1 0.5570942793
## 606 2 1 0.5553121617
## 6 2 1 0.5536401369
## 582 2 1 0.5505628979
## 167 2 1 0.5492655411
## 184 2 1 0.5468388191
## 161 2 1 0.5450981049
## 309 2 1 0.5442388166
## 368 2 1 0.5439628322
## 230 2 1 0.5426960260
## 450 2 1 0.5423952220
## 125 2 1 0.5417546434
## 468 2 1 0.5417483614
## 214 2 1 0.5416865177
## 547 2 1 0.5412183582
## 227 2 1 0.5409324350
## 22 2 1 0.5406775105
## 286 2 1 0.5404851872
## 392 2 1 0.5370104102
## 123 2 1 0.5363458076
## 612 2 1 0.5334757193
## 550 2 1 0.5322729890
## 178 2 1 0.5309842580
## 215 2 1 0.5309591321
## 63 2 1 0.5265893371
## 255 2 1 0.5265893371
## 359 2 1 0.5254209405
## 290 2 1 0.5245915385
## 262 2 1 0.5242723141
## 265 2 1 0.5242607601
## 589 2 1 0.5238117547
## 206 2 1 0.5234910463
## 211 2 1 0.5224602493
## 55 2 1 0.5222160866
## 609 2 1 0.5211363937
## 33 2 1 0.5210679463
## 587 2 1 0.5191617701
## 160 2 1 0.5168889764
## 296 2 1 0.5114483845
## 113 2 1 0.5107805876
## 107 2 1 0.5065360084
## 189 2 1 0.5062290974
## 426 2 1 0.5048903328
## 566 2 1 0.5035068954
## 441 2 1 0.5023578899
## 207 2 1 0.5023141087
## 467 2 1 0.5019828683
## 197 2 1 0.4986848475
## 516 2 1 0.4976340568
## 238 2 1 0.4970682807
## 524 2 1 0.4969274775
## 252 2 1 0.4967983943
## 313 2 1 0.4961071646
## 681 2 1 0.4960008821
## 575 2 1 0.4946486100
## 692 2 1 0.4937875818
## 285 2 1 0.4920981397
## 611 2 1 0.4911861355
## 306 2 1 0.4897163496
## 57 2 1 0.4872092768
## 480 2 1 0.4861595251
## 127 2 1 0.4842157582
## 627 2 1 0.4818009239
## 454 2 1 0.4815024344
## 88 2 1 0.4786305452
## 110 2 1 0.4746721681
## 150 2 1 0.4744501019
## 671 2 1 0.4726793918
## 699 2 1 0.4709653400
## 354 2 1 0.4706248162
## 19 2 1 0.4699301791
## 367 2 1 0.4684535714
## 50 2 1 0.4680005863
## 634 2 1 0.4679249077
## 507 2 1 0.4642472704
## 174 2 1 0.4638791419
## 435 2 1 0.4633501933
## 428 2 1 0.4618898643
## 358 2 1 0.4579246459
## 492 2 1 0.4521172430
## 300 2 1 0.4513353322
## 531 2 1 0.4496760533
## 329 2 1 0.4429948331
## 21 2 1 0.4415310741
## 267 2 1 0.4326393467
## 520 2 1 0.4316598543
## 105 2 1 0.4311583089
## 401 2 1 0.4264591489
## 362 2 1 0.4245070828
## 437 2 1 0.4216987155
## 69 2 1 0.4210891665
## 231 2 1 0.4201672628
## 604 2 1 0.4193797854
## 613 2 1 0.4185451409
## 698 2 1 0.4168146044
## 108 2 1 0.4167310618
## 85 2 1 0.4137006604
## 591 2 1 0.4123113938
## 101 2 1 0.4113971078
## 330 2 1 0.4084910536
## 54 2 1 0.4066440734
## 47 2 1 0.4054600572
## 188 2 1 0.4019267354
## 415 2 1 0.3972628708
## 303 2 1 0.3968049173
## 187 2 1 0.3928384051
## 68 2 1 0.3898083353
## 483 2 1 0.3892891893
## 185 2 1 0.3879610166
## 337 2 1 0.3872636623
## 361 2 1 0.3846439706
## 287 2 1 0.3840907246
## 239 2 1 0.3817356058
## 118 2 1 0.3784418282
## 269 2 1 0.3781720567
## 321 2 1 0.3732122804
## 592 2 1 0.3723667452
## 271 2 1 0.3676500518
## 335 2 1 0.3587238302
## 595 2 1 0.3585537492
## 436 2 1 0.3585359911
## 649 2 1 0.3572560635
## 224 2 1 0.3529320630
## 669 2 1 0.3447604805
## 237 2 1 0.3382847942
## 39 2 1 0.3376495914
## 605 2 1 0.3339627652
## 234 2 1 0.3216442924
## 175 2 1 0.3185091326
## 72 2 1 0.3070781630
## 53 2 1 0.3032924942
## 305 2 1 0.3016720793
## 168 2 1 0.2943736047
## 256 2 1 0.2939902550
## 56 2 1 0.2939698287
## 334 2 1 0.2937995265
## 387 2 1 0.2874585030
## 99 2 1 0.2858784541
## 86 2 1 0.2764852959
## 129 2 1 0.2696439131
## 124 2 1 0.2694985956
## 523 2 1 0.2643382111
## 340 2 1 0.2544588614
## 324 2 1 0.2534463530
## 260 2 1 0.2449623757
## 4 2 1 0.2444096130
## 2 2 1 0.2308609315
## 152 2 1 0.2238438310
## 143 2 1 0.2219817593
## 341 2 1 0.2145759351
## 280 2 1 0.2099558139
## 360 2 1 0.2018452122
## 283 2 1 0.1968699736
## 569 2 1 0.1927491234
## 44 2 1 0.1810042373
## 317 2 1 0.1778812304
## 240 2 1 0.1774251040
## 331 2 1 0.1658867289
## 284 2 1 0.1649062320
## 294 2 1 0.1584501710
## 112 2 1 0.1538365808
## 593 2 1 0.1480211115
## 40 2 1 0.1471546533
## 320 2 1 0.1454627122
## 253 2 1 0.1185896526
## 61 2 1 0.1131561632
## 350 2 1 0.1032242429
## 156 2 1 0.0827905599
## 87 2 1 0.0650241179
## 16 2 1 0.0315248145
## 42 2 1 0.0067823609
## 66 2 1 0.0009586036
## 353 2 1 -0.0002461887
## 489 2 1 -0.0053464846
## 75 2 1 -0.0769056348
## 297 2 1 -0.0770290314
## 51 2 1 -0.0821755494
## 289 2 1 -0.0901935806
## 268 2 1 -0.1032264982
## 273 2 1 -0.1032264982
## 658 2 1 -0.1152765773
## 349 2 1 -0.1245754866
## 180 2 1 -0.1565293712
## 147 2 1 -0.1614096874
## Average silhouette width per cluster:
## [1] 0.8024334 0.4101654
## Average silhouette width of total data set:
## [1] 0.6691886
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
pca.cluster.labels <- pca.pam2$clustering
pca.sil.pam2 <- silhouette(pca.cluster.labels, dist(pca.features))
fviz_silhouette(pca.sil.pam2, xlab="PAM") #silhouette information
## cluster size ave.sil.width
## 1 1 451 0.80
## 2 2 232 0.41

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.cluster.labels,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1",
ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = pca.cluster.labels, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')
## CLARA
# compute CLARA FOR ALL FEATURES
bcw.clara <- clara(bcw.features, 2, samples=200, pamLike = TRUE)
print(bcw.clara)
## Call: clara(x = bcw.features, k = 2, samples = 200, pamLike = TRUE)
## Medoids:
## clump_thickness uniformity_cell_size uniformity_cell_shape
## 646 3 1 1
## 468 6 6 6
## marginal_adhesion single_epithelial_cell_size bare_nuclei bland_chromatin
## 646 1 2 1 2
## 468 5 4 10 7
## normal_nucleoli mitoses
## 646 1 1
## 468 6 2
## Objective function: 4.52286
## Clustering vector: Named int [1:683] 1 2 1 2 1 2 1 1 1 1 1 1 1 1 2 1 1 1 ...
## - attr(*, "names")= chr [1:683] "1" "2" "3" "4" "5" "6" "7" ...
## Cluster sizes: 454 229
## Best sample:
## [1] 16 19 30 45 98 114 121 132 134 136 142 145 200 201 217 241 326 338 345
## [20] 346 352 396 397 405 416 451 468 476 477 500 505 514 540 542 543 576 582 611
## [39] 646 676 682 685 687 697
##
## Available components:
## [1] "sample" "medoids" "i.med" "clustering" "objective"
## [6] "clusinfo" "diss" "call" "silinfo" "data"
bcw.clara.clust <- bcw.clara$cluster
bcw.sil.clara <- silhouette(bcw.clara.clust, dist(bcw.features))
fviz_silhouette(bcw.sil.clara, xlab="CLARA")
## cluster size ave.sil.width
## 1 1 454 0.76
## 2 2 229 0.28

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.clara.clust,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size",
ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = bcw.clara.clust, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')
# compute CLARA FOR PCA FEATURES
pca.clara <- clara(pca.features, 2, samples=200, pamLike = TRUE)
print(pca.clara)
## Call: clara(x = pca.features, k = 2, samples = 200, pamLike = TRUE)
## Medoids:
## bcw.after.pca.x...1. bcw.after.pca.x...2. bcw.after.pca.x...3.
## 134 1.714471 0.09986549 -0.006010226
## 659 -3.115058 -0.50006703 0.011453841
## Objective function: 1.118681
## Clustering vector: Named int [1:683] 1 2 1 2 1 2 1 1 1 1 1 1 1 1 2 2 1 1 ...
## - attr(*, "names")= chr [1:683] "1" "2" "3" "4" "5" "6" "7" ...
## Cluster sizes: 451 232
## Best sample:
## [1] 18 20 23 39 40 112 113 119 134 150 157 161 166 190 191 205 230 235 271
## [20] 302 321 325 327 377 395 397 418 439 474 485 504 507 509 512 516 539 575 586
## [39] 623 639 659 681 694 696
##
## Available components:
## [1] "sample" "medoids" "i.med" "clustering" "objective"
## [6] "clusinfo" "diss" "call" "silinfo" "data"
pca.clara.clust <- pca.clara$cluster
pca.sil.clara <- silhouette(pca.clara.clust, dist(pca.features))
fviz_silhouette(pca.sil.clara, xlab="CLARA")
## cluster size ave.sil.width
## 1 1 451 0.80
## 2 2 232 0.41

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.clara.clust,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1",
ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = pca.clara.clust, cex=1, inset=c(0.02))
#snapshot3d(filename = '3dplot.png', fmt = 'png')
## AGNES
## FOR ALL FEATURES
# We compare the methods available for AGNES
m <- c( "average", "single", "complete")
names(m) <- c( "average", "single", "complete")
# function to compute coefficient
ac <- function(x) {
agnes(bcw.features, method = x)$ac
}
map_dbl(m, ac)
## average single complete
## 0.8717631 0.7973720 0.9189242
bcw.agnes.avg <- agnes(bcw.features, method = "average")
pltree(bcw.agnes.avg, cex = 0.6, hang = -1, main = "Dendrogram of AGNES with Average Linkage")

(bcw.agnes.avg.k2 <- cutree(bcw.agnes.avg, k=2)) #Cutting off at k=2
## [1] 1 2 1 2 1 2 2 1 1 1 1 1 1 1 2 1 1 1 2 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1
## [38] 2 2 1 2 1 2 1 2 1 1 2 1 1 2 2 2 1 2 1 2 1 1 1 2 1 1 1 1 2 2 1 1 2 1 2 1 1
## [75] 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 2 1 2 2 1 2 1 2 2
## [112] 2 1 1 1 2 1 1 1 1 2 2 2 1 2 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 2
## [149] 2 1 1 2 1 1 2 2 1 1 1 1 2 2 1 1 1 1 1 2 2 2 1 2 1 2 1 1 1 2 2 1 2 2 2 1 2
## [186] 2 1 1 1 1 2 1 1 1 2 2 1 1 1 2 2 1 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2 2 1 2 2
## [223] 1 2 2 2 1 2 1 2 2 2 2 1 1 1 1 1 1 2 2 1 1 2 2 2 2 2 1 1 1 2 2 2 2 2 1 1 2
## [260] 2 2 1 2 1 2 1 1 1 1 1 1 1 1 2 2 2 2 2 1 1 2 1 1 2 2 1 1 2 2 1 2 1 2 2 1 1
## [297] 2 1 1 1 1 1 1 2 2 1 2 2 1 2 1 1 2 1 2 2 2 1 1 2 2 1 2 1 1 2 2 1 1 1 2 1 1
## [334] 1 1 2 1 1 1 2 1 1 1 2 2 1 2 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
## [371] 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 2
## [408] 1 1 1 2 1 2 1 1 1 1 1 1 2 2 2 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 2 2 1
## [445] 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 1 1 2 1 1 1 2 1 2 1 1
## [482] 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 2 1 1
## [519] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2
## [556] 2 2 1 1 2 1 1 1 1 1 1 2 2 1 1 1 2 1 2 1 2 2 2 1 2 1 1 1 1 1 1 1 1 2 2 2 1
## [593] 1 2 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1
## [630] 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 2 2
## [667] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 2 2
table(bcw.agnes.avg.k2)
## bcw.agnes.avg.k2
## 1 2
## 467 216
bcw.sil.agnes <- silhouette(bcw.agnes.avg.k2, dist(bcw.features))
fviz_silhouette(bcw.sil.agnes, xlab="AGNES")
## cluster size ave.sil.width
## 1 1 467 0.73
## 2 2 216 0.29

fviz_dend(bcw.agnes.avg, cex=0.4, main="Dendrogram of AGNES with Average Linkage") # standard dendrogram

fviz_dend(bcw.agnes.avg, k=2, cex=0.4) # clustered dendrogram

fviz_dend(bcw.agnes.avg, type="circular", cex=0.4, k=2, main="Dendrogram of AGNES with Average Linkage") # circular dendrogram

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.agnes.avg.k2,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size",
ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = bcw.agnes.avg.k2, cex=1, inset=c(0.02))
## FOR PCA FEATURES
# We compare the methods available for AGNES
m <- c( "average", "single", "complete")
names(m) <- c( "average", "single", "complete")
# function to compute coefficient
ac <- function(x) {
agnes(pca.features, method = x)$ac
}
map_dbl(m, ac)
## average single complete
## 0.9692076 0.9015951 0.9790112
pca.agnes.avg <- agnes(pca.features, method = "average")
pltree(pca.agnes.avg, cex = 0.6, hang = -1, main = "Dendrogram of AGNES with Average Linkage - PCA")

(pca.agnes.avg.k2 <- cutree(pca.agnes.avg, k=2)) #Cutting off at k=2
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
## [149] 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
## [186] 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [223] 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
## [260] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [334] 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1
## [482] 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [519] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [593] 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1
## [630] 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
## [667] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
table(pca.agnes.avg.k2)
## pca.agnes.avg.k2
## 1 2
## 650 33
pca.sil.agnes <- silhouette(pca.agnes.avg.k2, dist(pca.features))
fviz_silhouette(pca.sil.agnes, xlab="AGNES")
## cluster size ave.sil.width
## 1 1 650 0.54
## 2 2 33 0.60

fviz_dend(pca.agnes.avg, cex=0.4, main="Dendrogram of AGNES with Average Linkage - PCA") # standard dendrogram

fviz_dend(pca.agnes.avg, k=2, cex=0.4) # clustered dendrogram

fviz_dend(pca.agnes.avg, type="circular", cex=0.4, k=2, main="Dendrogram of AGNES with Average Linkage - PCA") # circular dendrogram

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.agnes.avg.k2,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1",
ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = pca.agnes.avg.k2, cex=1, inset=c(0.02))
## DIANA
# compute divisive hierarchical clustering FOR ALL FEATURES
bcw.diana <- diana(bcw.features)
bcw.diana$dc
## [1] 0.9092248
pltree(bcw.diana, cex = 0.6, hang = -1, main = "Dendrogram of diana")
rect.hclust(bcw.diana, k = 2, border = 2:10)

bcw.diana.clust <- cutree(bcw.diana, k = 2)
bcw.sil.diana <- silhouette(bcw.diana.clust, dist(bcw.features))
fviz_silhouette(bcw.sil.diana, xlab="DIANA")
## cluster size ave.sil.width
## 1 1 472 0.72
## 2 2 211 0.31

fviz_cluster(list(data = bcw.features, cluster = bcw.diana.clust))

fviz_dend(bcw.diana, k=2, cex=0.4)

fviz_dend(bcw.diana, type="circular", cex=0.4, k=2)

# Visualization of cluster analysis results (3D scatterplot)
plot3d(bcw.features$uniformity_cell_size, bcw.features$uniformity_cell_shape, bcw$clump_thickness, col=bcw.diana.clust,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="uniformity_cell_size",
ylab="uniformity_cell_shape", zlab="clump thickness")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = bcw.diana.clust, cex=1, inset=c(0.02))
# compute divisive hierarchical clustering FOR PCA FEATURES
pca.diana <- diana(pca.features)
pca.diana$dc
## [1] 0.9757748
pltree(pca.diana, cex = 0.6, hang = -1, main = "Dendrogram of diana")
rect.hclust(pca.diana, k = 2, border = 2:10)

pca.diana.clust <- cutree(pca.diana, k = 2)
pca.sil.diana <- silhouette(pca.diana.clust, dist(pca.features))
fviz_silhouette(pca.sil.diana, xlab="DIANA")
## cluster size ave.sil.width
## 1 1 466 0.77
## 2 2 217 0.44

#fviz_cluster(list(data = pca.features, cluster = pca.diana.clust))
fviz_dend(pca.diana, k=2, cex=0.4)

fviz_dend(pca.agnes.avg, type="circular", cex=0.4, k=2, main="Dendrogram of AGNES with Average Linkage - PCA")

# Visualization of cluster analysis results (3D scatterplot)
plot3d(pca.features$bcw.after.pca.x...1., pca.features$bcw.after.pca.x...2., pca.features$bcw.after.pca.x...3., col=pca.diana.clust,
pch=as.numeric(bcw.real.class.labels), size = 1, type='s', xlab="Dim_1",
ylab="Dim_2", zlab="Dim_3")
legend3d("topright", legend = c("malignant", "benign"), pch = as.numeric(bcw.real.class.labels),
col = pca.diana.clust, cex=1, inset=c(0.02))
## External Cluster Validation
library(e1071)
library("fpc")
clust.results <- list(bcw.kmeans.labels,bcw.cluster.labels,bcw.agnes.avg.k2,bcw.diana.clust,bcw.clara.clust,
pca.kmeans.labels,pca.cluster.labels,pca.agnes.avg.k2,pca.diana.clust,pca.clara.clust)
partition.agreement <- numeric(10)
j=1
for (cruster in clust.results) {
matchClasses(table(cruster, bcw.real.class.labels), method="exact")
part.agreement <- compareMatchedClasses(cruster, bcw.real.class.labels, method="exact")$diag
print(part.agreement)
partition.agreement[j] <- round(part.agreement, 3)
j = j+1
}
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
## [,1]
## [1,] 0.9604685
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 95.9 %
## [,1]
## [1,] 0.9590044
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 94.29 %
## [,1]
## [1,] 0.942899
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 94.14 %
## [,1]
## [1,] 0.9414348
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 95.9 %
## [,1]
## [1,] 0.9590044
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
## [,1]
## [1,] 0.9604685
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
## [,1]
## [1,] 0.9604685
## Direct agreement: 1 of 2 pairs
## Iterations for permutation matching: 1
## Cases in matched pairs: 69.84 %
## [,1]
## [1,] 0.6983895
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 94.73 %
## [,1]
## [1,] 0.9472914
## Direct agreement: 2 of 2 pairs
## Cases in matched pairs: 96.05 %
## [,1]
## [1,] 0.9604685
pat.agree.val <- data.frame(Methods=c("KMeans", "PAM", "AGNES", "DIANA", "CLARA", "KMeans_PCA", "PAM_PCA",
"AGNES_PCA", "DIANA_PCA", "CLARA_PCA"),Partition_Agreement=partition.agreement)
ggplot(pat.agree.val, aes(x=Methods, y=Partition_Agreement, fill=Methods)) + geom_bar(stat="identity") +
geom_text(aes(label=Partition_Agreement), vjust=-0.3, size=3.5)

##PARTITION AGREEMENT: REAL LABELS AND MODEL LABELS COMPARED
clust.resultsss <- data.frame(bcw.kmeans.labels,bcw.cluster.labels,bcw.agnes.avg.k2,bcw.diana.clust,bcw.clara.clust,
pca.kmeans.labels,pca.cluster.labels,pca.agnes.avg.k2,pca.diana.clust,pca.clara.clust)
pat.res.matrix <- matrix(0, nrow = length(clust.results), ncol = length(clust.results))
colnames(pat.res.matrix) <- c("KMeans", "PAM", "AGNES", "DIANA", "CLARA", "KMeans_PCA", "PAM_PCA",
"AGNES_PCA", "DIANA_PCA", "CLARA_PCA")
rownames(pat.res.matrix) <- c("KMeans", "PAM", "AGNES", "DIANA", "CLARA", "KMeans_PCA", "PAM_PCA",
"AGNES_PCA", "DIANA_PCA", "CLARA_PCA")
pat.resprand.matrix <- matrix(0, nrow = length(clust.results), ncol = 1) #FOR THE RAND INDEX
acc.res.vector <- matrix(0, nrow = length(clust.results), ncol = 1) #ACCURACY
species <- as.numeric(bcw.real.class.labels)
for (i in 1:length(clust.results)){
for (j in 1:length(clust.results)){
if (i==j){
part.agreement <- compareMatchedClasses(clust.resultsss[,i], bcw.real.class.labels, method="exact")$diag
pat.res.matrix[i,j] <- round(part.agreement, 3)
accuracy <- mean(species == clust.resultsss[,i])
if (i==1){
acc.res.vector[i] <- round(1-accuracy, 3)
} else{
acc.res.vector[i] <- round(accuracy, 3)
}
if (i<6){
clust_stats <- cluster.stats(d = dist(pca.features), species, clust.resultsss[,i])
pat.resprand.matrix[i] <- round(clust_stats$corrected.rand, 3)
} else {
clust_stats <- cluster.stats(d = dist(bcw.features), species, clust.resultsss[,i])
pat.resprand.matrix[i] <- round(clust_stats$corrected.rand, 3)
}
} else {
part.agreement <- compareMatchedClasses(clust.resultsss[,i], clust.resultsss[,j], method="exact")$diag
pat.res.matrix[i,j] <- round(part.agreement, 3)
}
}
}
library(pheatmap)
pheatmap(pat.res.matrix, display_numbers = T, cluster_rows = F, cluster_cols = F, number_format = "%.3f")

pat.res.matrix #RAND INDEX
## KMeans PAM AGNES DIANA CLARA KMeans_PCA PAM_PCA AGNES_PCA
## KMeans 0.960 0.996 0.977 0.972 0.996 0.982 0.988 0.712
## PAM 0.996 0.959 0.978 0.974 1.000 0.978 0.984 0.713
## AGNES 0.977 0.978 0.943 0.972 0.978 0.959 0.965 0.732
## DIANA 0.972 0.974 0.972 0.941 0.974 0.969 0.969 0.739
## CLARA 0.996 1.000 0.978 0.974 0.959 0.978 0.984 0.713
## KMeans_PCA 0.982 0.978 0.959 0.969 0.978 0.960 0.994 0.709
## PAM_PCA 0.988 0.984 0.965 0.969 0.984 0.994 0.960 0.709
## AGNES_PCA 0.712 0.713 0.732 0.739 0.713 0.709 0.709 0.698
## DIANA_PCA 0.981 0.980 0.975 0.991 0.980 0.978 0.978 0.731
## CLARA_PCA 0.988 0.984 0.965 0.969 0.984 0.994 1.000 0.709
## DIANA_PCA CLARA_PCA
## KMeans 0.981 0.988
## PAM 0.980 0.984
## AGNES 0.975 0.965
## DIANA 0.991 0.969
## CLARA 0.980 0.984
## KMeans_PCA 0.978 0.994
## PAM_PCA 0.978 1.000
## AGNES_PCA 0.731 0.709
## DIANA_PCA 0.947 0.978
## CLARA_PCA 0.978 0.960
acc.res.vector #ACCURACY MEASURE
## [,1]
## [1,] 0.040
## [2,] 0.959
## [3,] 0.943
## [4,] 0.941
## [5,] 0.959
## [6,] 0.040
## [7,] 0.960
## [8,] 0.698
## [9,] 0.947
## [10,] 0.960